multifd.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559
  1. /*
  2. * Multifd common code
  3. *
  4. * Copyright (c) 2019-2020 Red Hat Inc
  5. *
  6. * Authors:
  7. * Juan Quintela <quintela@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "qemu/cutils.h"
  14. #include "qemu/iov.h"
  15. #include "qemu/rcu.h"
  16. #include "exec/target_page.h"
  17. #include "system/system.h"
  18. #include "exec/ramblock.h"
  19. #include "qemu/error-report.h"
  20. #include "qapi/error.h"
  21. #include "file.h"
  22. #include "migration/misc.h"
  23. #include "migration.h"
  24. #include "migration-stats.h"
  25. #include "savevm.h"
  26. #include "socket.h"
  27. #include "tls.h"
  28. #include "qemu-file.h"
  29. #include "trace.h"
  30. #include "multifd.h"
  31. #include "threadinfo.h"
  32. #include "options.h"
  33. #include "qemu/yank.h"
  34. #include "io/channel-file.h"
  35. #include "io/channel-socket.h"
  36. #include "yank_functions.h"
  37. /* Multiple fd's */
  38. #define MULTIFD_MAGIC 0x11223344U
  39. #define MULTIFD_VERSION 1
  40. typedef struct {
  41. uint32_t magic;
  42. uint32_t version;
  43. unsigned char uuid[16]; /* QemuUUID */
  44. uint8_t id;
  45. uint8_t unused1[7]; /* Reserved for future use */
  46. uint64_t unused2[4]; /* Reserved for future use */
  47. } __attribute__((packed)) MultiFDInit_t;
  48. struct {
  49. MultiFDSendParams *params;
  50. /* multifd_send() body is not thread safe, needs serialization */
  51. QemuMutex multifd_send_mutex;
  52. /*
  53. * Global number of generated multifd packets.
  54. *
  55. * Note that we used 'uintptr_t' because it'll naturally support atomic
  56. * operations on both 32bit / 64 bits hosts. It means on 32bit systems
  57. * multifd will overflow the packet_num easier, but that should be
  58. * fine.
  59. *
  60. * Another option is to use QEMU's Stat64 then it'll be 64 bits on all
  61. * hosts, however so far it does not support atomic fetch_add() yet.
  62. * Make it easy for now.
  63. */
  64. uintptr_t packet_num;
  65. /*
  66. * Synchronization point past which no more channels will be
  67. * created.
  68. */
  69. QemuSemaphore channels_created;
  70. /* send channels ready */
  71. QemuSemaphore channels_ready;
  72. /*
  73. * Have we already run terminate threads. There is a race when it
  74. * happens that we got one error while we are exiting.
  75. * We will use atomic operations. Only valid values are 0 and 1.
  76. */
  77. int exiting;
  78. /* multifd ops */
  79. const MultiFDMethods *ops;
  80. } *multifd_send_state;
  81. struct {
  82. MultiFDRecvParams *params;
  83. MultiFDRecvData *data;
  84. /* number of created threads */
  85. int count;
  86. /*
  87. * This is always posted by the recv threads, the migration thread
  88. * uses it to wait for recv threads to finish assigned tasks.
  89. */
  90. QemuSemaphore sem_sync;
  91. /* global number of generated multifd packets */
  92. uint64_t packet_num;
  93. int exiting;
  94. /* multifd ops */
  95. const MultiFDMethods *ops;
  96. } *multifd_recv_state;
  97. MultiFDSendData *multifd_send_data_alloc(void)
  98. {
  99. MultiFDSendData *new = g_new0(MultiFDSendData, 1);
  100. multifd_ram_payload_alloc(&new->u.ram);
  101. /* Device state allocates its payload on-demand */
  102. return new;
  103. }
  104. void multifd_send_data_clear(MultiFDSendData *data)
  105. {
  106. if (multifd_payload_empty(data)) {
  107. return;
  108. }
  109. switch (data->type) {
  110. case MULTIFD_PAYLOAD_DEVICE_STATE:
  111. multifd_send_data_clear_device_state(&data->u.device_state);
  112. break;
  113. default:
  114. /* Nothing to do */
  115. break;
  116. }
  117. data->type = MULTIFD_PAYLOAD_NONE;
  118. }
  119. void multifd_send_data_free(MultiFDSendData *data)
  120. {
  121. if (!data) {
  122. return;
  123. }
  124. /* This also free's device state payload */
  125. multifd_send_data_clear(data);
  126. multifd_ram_payload_free(&data->u.ram);
  127. g_free(data);
  128. }
  129. static bool multifd_use_packets(void)
  130. {
  131. return !migrate_mapped_ram();
  132. }
  133. void multifd_send_channel_created(void)
  134. {
  135. qemu_sem_post(&multifd_send_state->channels_created);
  136. }
  137. static const MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = {};
  138. void multifd_register_ops(int method, const MultiFDMethods *ops)
  139. {
  140. assert(0 <= method && method < MULTIFD_COMPRESSION__MAX);
  141. assert(!multifd_ops[method]);
  142. multifd_ops[method] = ops;
  143. }
  144. static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
  145. {
  146. MultiFDInit_t msg = {};
  147. size_t size = sizeof(msg);
  148. int ret;
  149. msg.magic = cpu_to_be32(MULTIFD_MAGIC);
  150. msg.version = cpu_to_be32(MULTIFD_VERSION);
  151. msg.id = p->id;
  152. memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
  153. ret = qio_channel_write_all(p->c, (char *)&msg, size, errp);
  154. if (ret != 0) {
  155. return -1;
  156. }
  157. stat64_add(&mig_stats.multifd_bytes, size);
  158. return 0;
  159. }
  160. static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
  161. {
  162. MultiFDInit_t msg;
  163. int ret;
  164. ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
  165. if (ret != 0) {
  166. return -1;
  167. }
  168. msg.magic = be32_to_cpu(msg.magic);
  169. msg.version = be32_to_cpu(msg.version);
  170. if (msg.magic != MULTIFD_MAGIC) {
  171. error_setg(errp, "multifd: received packet magic %x "
  172. "expected %x", msg.magic, MULTIFD_MAGIC);
  173. return -1;
  174. }
  175. if (msg.version != MULTIFD_VERSION) {
  176. error_setg(errp, "multifd: received packet version %u "
  177. "expected %u", msg.version, MULTIFD_VERSION);
  178. return -1;
  179. }
  180. if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
  181. char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
  182. char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
  183. error_setg(errp, "multifd: received uuid '%s' and expected "
  184. "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
  185. g_free(uuid);
  186. g_free(msg_uuid);
  187. return -1;
  188. }
  189. if (msg.id > migrate_multifd_channels()) {
  190. error_setg(errp, "multifd: received channel id %u is greater than "
  191. "number of channels %u", msg.id, migrate_multifd_channels());
  192. return -1;
  193. }
  194. return msg.id;
  195. }
  196. /* Fills a RAM multifd packet */
  197. void multifd_send_fill_packet(MultiFDSendParams *p)
  198. {
  199. MultiFDPacket_t *packet = p->packet;
  200. uint64_t packet_num;
  201. bool sync_packet = p->flags & MULTIFD_FLAG_SYNC;
  202. memset(packet, 0, p->packet_len);
  203. packet->hdr.magic = cpu_to_be32(MULTIFD_MAGIC);
  204. packet->hdr.version = cpu_to_be32(MULTIFD_VERSION);
  205. packet->hdr.flags = cpu_to_be32(p->flags);
  206. packet->next_packet_size = cpu_to_be32(p->next_packet_size);
  207. packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num);
  208. packet->packet_num = cpu_to_be64(packet_num);
  209. p->packets_sent++;
  210. if (!sync_packet) {
  211. multifd_ram_fill_packet(p);
  212. }
  213. trace_multifd_send_fill(p->id, packet_num,
  214. p->flags, p->next_packet_size);
  215. }
  216. static int multifd_recv_unfill_packet_header(MultiFDRecvParams *p,
  217. const MultiFDPacketHdr_t *hdr,
  218. Error **errp)
  219. {
  220. uint32_t magic = be32_to_cpu(hdr->magic);
  221. uint32_t version = be32_to_cpu(hdr->version);
  222. if (magic != MULTIFD_MAGIC) {
  223. error_setg(errp, "multifd: received packet magic %x, expected %x",
  224. magic, MULTIFD_MAGIC);
  225. return -1;
  226. }
  227. if (version != MULTIFD_VERSION) {
  228. error_setg(errp, "multifd: received packet version %u, expected %u",
  229. version, MULTIFD_VERSION);
  230. return -1;
  231. }
  232. p->flags = be32_to_cpu(hdr->flags);
  233. return 0;
  234. }
  235. static int multifd_recv_unfill_packet_device_state(MultiFDRecvParams *p,
  236. Error **errp)
  237. {
  238. MultiFDPacketDeviceState_t *packet = p->packet_dev_state;
  239. packet->instance_id = be32_to_cpu(packet->instance_id);
  240. p->next_packet_size = be32_to_cpu(packet->next_packet_size);
  241. return 0;
  242. }
  243. static int multifd_recv_unfill_packet_ram(MultiFDRecvParams *p, Error **errp)
  244. {
  245. const MultiFDPacket_t *packet = p->packet;
  246. int ret = 0;
  247. p->next_packet_size = be32_to_cpu(packet->next_packet_size);
  248. p->packet_num = be64_to_cpu(packet->packet_num);
  249. /* Always unfill, old QEMUs (<9.0) send data along with SYNC */
  250. ret = multifd_ram_unfill_packet(p, errp);
  251. trace_multifd_recv_unfill(p->id, p->packet_num, p->flags,
  252. p->next_packet_size);
  253. return ret;
  254. }
  255. static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
  256. {
  257. p->packets_recved++;
  258. if (p->flags & MULTIFD_FLAG_DEVICE_STATE) {
  259. return multifd_recv_unfill_packet_device_state(p, errp);
  260. }
  261. return multifd_recv_unfill_packet_ram(p, errp);
  262. }
  263. static bool multifd_send_should_exit(void)
  264. {
  265. return qatomic_read(&multifd_send_state->exiting);
  266. }
  267. static bool multifd_recv_should_exit(void)
  268. {
  269. return qatomic_read(&multifd_recv_state->exiting);
  270. }
  271. /*
  272. * The migration thread can wait on either of the two semaphores. This
  273. * function can be used to kick the main thread out of waiting on either of
  274. * them. Should mostly only be called when something wrong happened with
  275. * the current multifd send thread.
  276. */
  277. static void multifd_send_kick_main(MultiFDSendParams *p)
  278. {
  279. qemu_sem_post(&p->sem_sync);
  280. qemu_sem_post(&multifd_send_state->channels_ready);
  281. }
  282. /*
  283. * multifd_send() works by exchanging the MultiFDSendData object
  284. * provided by the caller with an unused MultiFDSendData object from
  285. * the next channel that is found to be idle.
  286. *
  287. * The channel owns the data until it finishes transmitting and the
  288. * caller owns the empty object until it fills it with data and calls
  289. * this function again. No locking necessary.
  290. *
  291. * Switching is safe because both the migration thread and the channel
  292. * thread have barriers in place to serialize access.
  293. *
  294. * Returns true if succeed, false otherwise.
  295. */
  296. bool multifd_send(MultiFDSendData **send_data)
  297. {
  298. int i;
  299. static int next_channel;
  300. MultiFDSendParams *p = NULL; /* make happy gcc */
  301. MultiFDSendData *tmp;
  302. if (multifd_send_should_exit()) {
  303. return false;
  304. }
  305. QEMU_LOCK_GUARD(&multifd_send_state->multifd_send_mutex);
  306. /* We wait here, until at least one channel is ready */
  307. qemu_sem_wait(&multifd_send_state->channels_ready);
  308. /*
  309. * next_channel can remain from a previous migration that was
  310. * using more channels, so ensure it doesn't overflow if the
  311. * limit is lower now.
  312. */
  313. next_channel %= migrate_multifd_channels();
  314. for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) {
  315. if (multifd_send_should_exit()) {
  316. return false;
  317. }
  318. p = &multifd_send_state->params[i];
  319. /*
  320. * Lockless read to p->pending_job is safe, because only multifd
  321. * sender thread can clear it.
  322. */
  323. if (qatomic_read(&p->pending_job) == false) {
  324. next_channel = (i + 1) % migrate_multifd_channels();
  325. break;
  326. }
  327. }
  328. /*
  329. * Make sure we read p->pending_job before all the rest. Pairs with
  330. * qatomic_store_release() in multifd_send_thread().
  331. */
  332. smp_mb_acquire();
  333. assert(multifd_payload_empty(p->data));
  334. /*
  335. * Swap the pointers. The channel gets the client data for
  336. * transferring and the client gets back an unused data slot.
  337. */
  338. tmp = *send_data;
  339. *send_data = p->data;
  340. p->data = tmp;
  341. /*
  342. * Making sure p->data is setup before marking pending_job=true. Pairs
  343. * with the qatomic_load_acquire() in multifd_send_thread().
  344. */
  345. qatomic_store_release(&p->pending_job, true);
  346. qemu_sem_post(&p->sem);
  347. return true;
  348. }
  349. /* Multifd send side hit an error; remember it and prepare to quit */
  350. static void multifd_send_set_error(Error *err)
  351. {
  352. /*
  353. * We don't want to exit each threads twice. Depending on where
  354. * we get the error, or if there are two independent errors in two
  355. * threads at the same time, we can end calling this function
  356. * twice.
  357. */
  358. if (qatomic_xchg(&multifd_send_state->exiting, 1)) {
  359. return;
  360. }
  361. if (err) {
  362. MigrationState *s = migrate_get_current();
  363. migrate_set_error(s, err);
  364. if (s->state == MIGRATION_STATUS_SETUP ||
  365. s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
  366. s->state == MIGRATION_STATUS_DEVICE ||
  367. s->state == MIGRATION_STATUS_ACTIVE) {
  368. migrate_set_state(&s->state, s->state,
  369. MIGRATION_STATUS_FAILED);
  370. }
  371. }
  372. }
  373. static void multifd_send_terminate_threads(void)
  374. {
  375. int i;
  376. trace_multifd_send_terminate_threads();
  377. /*
  378. * Tell everyone we're quitting. No xchg() needed here; we simply
  379. * always set it.
  380. */
  381. qatomic_set(&multifd_send_state->exiting, 1);
  382. /*
  383. * Firstly, kick all threads out; no matter whether they are just idle,
  384. * or blocked in an IO system call.
  385. */
  386. for (i = 0; i < migrate_multifd_channels(); i++) {
  387. MultiFDSendParams *p = &multifd_send_state->params[i];
  388. qemu_sem_post(&p->sem);
  389. if (p->c) {
  390. qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
  391. }
  392. }
  393. /*
  394. * Finally recycle all the threads.
  395. */
  396. for (i = 0; i < migrate_multifd_channels(); i++) {
  397. MultiFDSendParams *p = &multifd_send_state->params[i];
  398. if (p->tls_thread_created) {
  399. qemu_thread_join(&p->tls_thread);
  400. }
  401. if (p->thread_created) {
  402. qemu_thread_join(&p->thread);
  403. }
  404. }
  405. }
  406. static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp)
  407. {
  408. if (p->c) {
  409. migration_ioc_unregister_yank(p->c);
  410. /*
  411. * The object_unref() cannot guarantee the fd will always be
  412. * released because finalize() of the iochannel is only
  413. * triggered on the last reference and it's not guaranteed
  414. * that we always hold the last refcount when reaching here.
  415. *
  416. * Closing the fd explicitly has the benefit that if there is any
  417. * registered I/O handler callbacks on such fd, that will get a
  418. * POLLNVAL event and will further trigger the cleanup to finally
  419. * release the IOC.
  420. *
  421. * FIXME: It should logically be guaranteed that all multifd
  422. * channels have no I/O handler callback registered when reaching
  423. * here, because migration thread will wait for all multifd channel
  424. * establishments to complete during setup. Since
  425. * migration_cleanup() will be scheduled in main thread too, all
  426. * previous callbacks should guarantee to be completed when
  427. * reaching here. See multifd_send_state.channels_created and its
  428. * usage. In the future, we could replace this with an assert
  429. * making sure we're the last reference, or simply drop it if above
  430. * is more clear to be justified.
  431. */
  432. qio_channel_close(p->c, &error_abort);
  433. object_unref(OBJECT(p->c));
  434. p->c = NULL;
  435. }
  436. qemu_sem_destroy(&p->sem);
  437. qemu_sem_destroy(&p->sem_sync);
  438. g_free(p->name);
  439. p->name = NULL;
  440. g_clear_pointer(&p->data, multifd_send_data_free);
  441. p->packet_len = 0;
  442. g_clear_pointer(&p->packet_device_state, g_free);
  443. g_free(p->packet);
  444. p->packet = NULL;
  445. multifd_send_state->ops->send_cleanup(p, errp);
  446. assert(!p->iov);
  447. return *errp == NULL;
  448. }
  449. static void multifd_send_cleanup_state(void)
  450. {
  451. file_cleanup_outgoing_migration();
  452. socket_cleanup_outgoing_migration();
  453. multifd_device_state_send_cleanup();
  454. qemu_sem_destroy(&multifd_send_state->channels_created);
  455. qemu_sem_destroy(&multifd_send_state->channels_ready);
  456. qemu_mutex_destroy(&multifd_send_state->multifd_send_mutex);
  457. g_free(multifd_send_state->params);
  458. multifd_send_state->params = NULL;
  459. g_free(multifd_send_state);
  460. multifd_send_state = NULL;
  461. }
  462. void multifd_send_shutdown(void)
  463. {
  464. int i;
  465. if (!migrate_multifd()) {
  466. return;
  467. }
  468. for (i = 0; i < migrate_multifd_channels(); i++) {
  469. MultiFDSendParams *p = &multifd_send_state->params[i];
  470. /* thread_created implies the TLS handshake has succeeded */
  471. if (p->tls_thread_created && p->thread_created) {
  472. Error *local_err = NULL;
  473. /*
  474. * The destination expects the TLS session to always be
  475. * properly terminated. This helps to detect a premature
  476. * termination in the middle of the stream. Note that
  477. * older QEMUs always break the connection on the source
  478. * and the destination always sees
  479. * GNUTLS_E_PREMATURE_TERMINATION.
  480. */
  481. migration_tls_channel_end(p->c, &local_err);
  482. /*
  483. * The above can return an error in case the migration has
  484. * already failed. If the migration succeeded, errors are
  485. * not expected but there's no need to kill the source.
  486. */
  487. if (local_err && !migration_has_failed(migrate_get_current())) {
  488. warn_report(
  489. "multifd_send_%d: Failed to terminate TLS connection: %s",
  490. p->id, error_get_pretty(local_err));
  491. break;
  492. }
  493. }
  494. }
  495. multifd_send_terminate_threads();
  496. for (i = 0; i < migrate_multifd_channels(); i++) {
  497. MultiFDSendParams *p = &multifd_send_state->params[i];
  498. Error *local_err = NULL;
  499. if (!multifd_send_cleanup_channel(p, &local_err)) {
  500. migrate_set_error(migrate_get_current(), local_err);
  501. error_free(local_err);
  502. }
  503. }
  504. multifd_send_cleanup_state();
  505. }
  506. static int multifd_zero_copy_flush(QIOChannel *c)
  507. {
  508. int ret;
  509. Error *err = NULL;
  510. ret = qio_channel_flush(c, &err);
  511. if (ret < 0) {
  512. error_report_err(err);
  513. return -1;
  514. }
  515. if (ret == 1) {
  516. stat64_add(&mig_stats.dirty_sync_missed_zero_copy, 1);
  517. }
  518. return ret;
  519. }
  520. int multifd_send_sync_main(MultiFDSyncReq req)
  521. {
  522. int i;
  523. bool flush_zero_copy;
  524. assert(req != MULTIFD_SYNC_NONE);
  525. flush_zero_copy = migrate_zero_copy_send();
  526. for (i = 0; i < migrate_multifd_channels(); i++) {
  527. MultiFDSendParams *p = &multifd_send_state->params[i];
  528. if (multifd_send_should_exit()) {
  529. return -1;
  530. }
  531. trace_multifd_send_sync_main_signal(p->id);
  532. /*
  533. * We should be the only user so far, so not possible to be set by
  534. * others concurrently.
  535. */
  536. assert(qatomic_read(&p->pending_sync) == MULTIFD_SYNC_NONE);
  537. qatomic_set(&p->pending_sync, req);
  538. qemu_sem_post(&p->sem);
  539. }
  540. for (i = 0; i < migrate_multifd_channels(); i++) {
  541. MultiFDSendParams *p = &multifd_send_state->params[i];
  542. if (multifd_send_should_exit()) {
  543. return -1;
  544. }
  545. qemu_sem_wait(&multifd_send_state->channels_ready);
  546. trace_multifd_send_sync_main_wait(p->id);
  547. qemu_sem_wait(&p->sem_sync);
  548. if (flush_zero_copy && p->c && (multifd_zero_copy_flush(p->c) < 0)) {
  549. return -1;
  550. }
  551. }
  552. trace_multifd_send_sync_main(multifd_send_state->packet_num);
  553. return 0;
  554. }
  555. static void *multifd_send_thread(void *opaque)
  556. {
  557. MultiFDSendParams *p = opaque;
  558. MigrationThread *thread = NULL;
  559. Error *local_err = NULL;
  560. int ret = 0;
  561. bool use_packets = multifd_use_packets();
  562. thread = migration_threads_add(p->name, qemu_get_thread_id());
  563. trace_multifd_send_thread_start(p->id);
  564. rcu_register_thread();
  565. if (use_packets) {
  566. if (multifd_send_initial_packet(p, &local_err) < 0) {
  567. ret = -1;
  568. goto out;
  569. }
  570. }
  571. while (true) {
  572. qemu_sem_post(&multifd_send_state->channels_ready);
  573. qemu_sem_wait(&p->sem);
  574. if (multifd_send_should_exit()) {
  575. break;
  576. }
  577. /*
  578. * Read pending_job flag before p->data. Pairs with the
  579. * qatomic_store_release() in multifd_send().
  580. */
  581. if (qatomic_load_acquire(&p->pending_job)) {
  582. bool is_device_state = multifd_payload_device_state(p->data);
  583. size_t total_size;
  584. int write_flags_masked = 0;
  585. p->flags = 0;
  586. p->iovs_num = 0;
  587. assert(!multifd_payload_empty(p->data));
  588. if (is_device_state) {
  589. multifd_device_state_send_prepare(p);
  590. /* Device state packets cannot be sent via zerocopy */
  591. write_flags_masked |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
  592. } else {
  593. ret = multifd_send_state->ops->send_prepare(p, &local_err);
  594. if (ret != 0) {
  595. break;
  596. }
  597. }
  598. /*
  599. * The packet header in the zerocopy RAM case is accounted for
  600. * in multifd_nocomp_send_prepare() - where it is actually
  601. * being sent.
  602. */
  603. total_size = iov_size(p->iov, p->iovs_num);
  604. if (migrate_mapped_ram()) {
  605. assert(!is_device_state);
  606. ret = file_write_ramblock_iov(p->c, p->iov, p->iovs_num,
  607. &p->data->u.ram, &local_err);
  608. } else {
  609. ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num,
  610. NULL, 0,
  611. p->write_flags & ~write_flags_masked,
  612. &local_err);
  613. }
  614. if (ret != 0) {
  615. break;
  616. }
  617. stat64_add(&mig_stats.multifd_bytes, total_size);
  618. p->next_packet_size = 0;
  619. multifd_send_data_clear(p->data);
  620. /*
  621. * Making sure p->data is published before saying "we're
  622. * free". Pairs with the smp_mb_acquire() in
  623. * multifd_send().
  624. */
  625. qatomic_store_release(&p->pending_job, false);
  626. } else {
  627. MultiFDSyncReq req = qatomic_read(&p->pending_sync);
  628. /*
  629. * If not a normal job, must be a sync request. Note that
  630. * pending_sync is a standalone flag (unlike pending_job), so
  631. * it doesn't require explicit memory barriers.
  632. */
  633. assert(req != MULTIFD_SYNC_NONE);
  634. /* Only push the SYNC message if it involves a remote sync */
  635. if (req == MULTIFD_SYNC_ALL) {
  636. p->flags = MULTIFD_FLAG_SYNC;
  637. multifd_send_fill_packet(p);
  638. ret = qio_channel_write_all(p->c, (void *)p->packet,
  639. p->packet_len, &local_err);
  640. if (ret != 0) {
  641. break;
  642. }
  643. /* p->next_packet_size will always be zero for a SYNC packet */
  644. stat64_add(&mig_stats.multifd_bytes, p->packet_len);
  645. }
  646. qatomic_set(&p->pending_sync, MULTIFD_SYNC_NONE);
  647. qemu_sem_post(&p->sem_sync);
  648. }
  649. }
  650. out:
  651. if (ret) {
  652. assert(local_err);
  653. trace_multifd_send_error(p->id);
  654. multifd_send_set_error(local_err);
  655. multifd_send_kick_main(p);
  656. error_free(local_err);
  657. }
  658. rcu_unregister_thread();
  659. migration_threads_remove(thread);
  660. trace_multifd_send_thread_end(p->id, p->packets_sent);
  661. return NULL;
  662. }
  663. static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque);
  664. typedef struct {
  665. MultiFDSendParams *p;
  666. QIOChannelTLS *tioc;
  667. } MultiFDTLSThreadArgs;
  668. static void *multifd_tls_handshake_thread(void *opaque)
  669. {
  670. MultiFDTLSThreadArgs *args = opaque;
  671. qio_channel_tls_handshake(args->tioc,
  672. multifd_new_send_channel_async,
  673. args->p,
  674. NULL,
  675. NULL);
  676. g_free(args);
  677. return NULL;
  678. }
  679. static bool multifd_tls_channel_connect(MultiFDSendParams *p,
  680. QIOChannel *ioc,
  681. Error **errp)
  682. {
  683. MigrationState *s = migrate_get_current();
  684. const char *hostname = s->hostname;
  685. MultiFDTLSThreadArgs *args;
  686. QIOChannelTLS *tioc;
  687. tioc = migration_tls_client_create(ioc, hostname, errp);
  688. if (!tioc) {
  689. return false;
  690. }
  691. /*
  692. * Ownership of the socket channel now transfers to the newly
  693. * created TLS channel, which has already taken a reference.
  694. */
  695. object_unref(OBJECT(ioc));
  696. trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname);
  697. qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing");
  698. args = g_new0(MultiFDTLSThreadArgs, 1);
  699. args->tioc = tioc;
  700. args->p = p;
  701. p->tls_thread_created = true;
  702. qemu_thread_create(&p->tls_thread, MIGRATION_THREAD_SRC_TLS,
  703. multifd_tls_handshake_thread, args,
  704. QEMU_THREAD_JOINABLE);
  705. return true;
  706. }
  707. void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc)
  708. {
  709. qio_channel_set_delay(ioc, false);
  710. migration_ioc_register_yank(ioc);
  711. /* Setup p->c only if the channel is completely setup */
  712. p->c = ioc;
  713. p->thread_created = true;
  714. qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
  715. QEMU_THREAD_JOINABLE);
  716. }
  717. /*
  718. * When TLS is enabled this function is called once to establish the
  719. * TLS connection and a second time after the TLS handshake to create
  720. * the multifd channel. Without TLS it goes straight into the channel
  721. * creation.
  722. */
  723. static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
  724. {
  725. MultiFDSendParams *p = opaque;
  726. QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
  727. Error *local_err = NULL;
  728. bool ret;
  729. trace_multifd_new_send_channel_async(p->id);
  730. if (qio_task_propagate_error(task, &local_err)) {
  731. ret = false;
  732. goto out;
  733. }
  734. trace_multifd_set_outgoing_channel(ioc, object_get_typename(OBJECT(ioc)),
  735. migrate_get_current()->hostname);
  736. if (migrate_channel_requires_tls_upgrade(ioc)) {
  737. ret = multifd_tls_channel_connect(p, ioc, &local_err);
  738. if (ret) {
  739. return;
  740. }
  741. } else {
  742. multifd_channel_connect(p, ioc);
  743. ret = true;
  744. }
  745. out:
  746. /*
  747. * Here we're not interested whether creation succeeded, only that
  748. * it happened at all.
  749. */
  750. multifd_send_channel_created();
  751. if (ret) {
  752. return;
  753. }
  754. trace_multifd_new_send_channel_async_error(p->id, local_err);
  755. multifd_send_set_error(local_err);
  756. /*
  757. * For error cases (TLS or non-TLS), IO channel is always freed here
  758. * rather than when cleanup multifd: since p->c is not set, multifd
  759. * cleanup code doesn't even know its existence.
  760. */
  761. object_unref(OBJECT(ioc));
  762. error_free(local_err);
  763. }
  764. static bool multifd_new_send_channel_create(gpointer opaque, Error **errp)
  765. {
  766. if (!multifd_use_packets()) {
  767. return file_send_channel_create(opaque, errp);
  768. }
  769. socket_send_channel_create(multifd_new_send_channel_async, opaque);
  770. return true;
  771. }
  772. bool multifd_send_setup(void)
  773. {
  774. MigrationState *s = migrate_get_current();
  775. int thread_count, ret = 0;
  776. uint32_t page_count = multifd_ram_page_count();
  777. bool use_packets = multifd_use_packets();
  778. uint8_t i;
  779. if (!migrate_multifd()) {
  780. return true;
  781. }
  782. thread_count = migrate_multifd_channels();
  783. multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
  784. multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
  785. qemu_mutex_init(&multifd_send_state->multifd_send_mutex);
  786. qemu_sem_init(&multifd_send_state->channels_created, 0);
  787. qemu_sem_init(&multifd_send_state->channels_ready, 0);
  788. qatomic_set(&multifd_send_state->exiting, 0);
  789. multifd_send_state->ops = multifd_ops[migrate_multifd_compression()];
  790. for (i = 0; i < thread_count; i++) {
  791. MultiFDSendParams *p = &multifd_send_state->params[i];
  792. Error *local_err = NULL;
  793. qemu_sem_init(&p->sem, 0);
  794. qemu_sem_init(&p->sem_sync, 0);
  795. p->id = i;
  796. p->data = multifd_send_data_alloc();
  797. if (use_packets) {
  798. p->packet_len = sizeof(MultiFDPacket_t)
  799. + sizeof(uint64_t) * page_count;
  800. p->packet = g_malloc0(p->packet_len);
  801. p->packet_device_state = g_malloc0(sizeof(*p->packet_device_state));
  802. p->packet_device_state->hdr.magic = cpu_to_be32(MULTIFD_MAGIC);
  803. p->packet_device_state->hdr.version = cpu_to_be32(MULTIFD_VERSION);
  804. }
  805. p->name = g_strdup_printf(MIGRATION_THREAD_SRC_MULTIFD, i);
  806. p->write_flags = 0;
  807. if (!multifd_new_send_channel_create(p, &local_err)) {
  808. migrate_set_error(s, local_err);
  809. ret = -1;
  810. }
  811. }
  812. /*
  813. * Wait until channel creation has started for all channels. The
  814. * creation can still fail, but no more channels will be created
  815. * past this point.
  816. */
  817. for (i = 0; i < thread_count; i++) {
  818. qemu_sem_wait(&multifd_send_state->channels_created);
  819. }
  820. if (ret) {
  821. goto err;
  822. }
  823. for (i = 0; i < thread_count; i++) {
  824. MultiFDSendParams *p = &multifd_send_state->params[i];
  825. Error *local_err = NULL;
  826. ret = multifd_send_state->ops->send_setup(p, &local_err);
  827. if (ret) {
  828. migrate_set_error(s, local_err);
  829. goto err;
  830. }
  831. assert(p->iov);
  832. }
  833. multifd_device_state_send_setup();
  834. return true;
  835. err:
  836. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  837. MIGRATION_STATUS_FAILED);
  838. return false;
  839. }
  840. bool multifd_recv(void)
  841. {
  842. int i;
  843. static int next_recv_channel;
  844. MultiFDRecvParams *p = NULL;
  845. MultiFDRecvData *data = multifd_recv_state->data;
  846. /*
  847. * next_channel can remain from a previous migration that was
  848. * using more channels, so ensure it doesn't overflow if the
  849. * limit is lower now.
  850. */
  851. next_recv_channel %= migrate_multifd_channels();
  852. for (i = next_recv_channel;; i = (i + 1) % migrate_multifd_channels()) {
  853. if (multifd_recv_should_exit()) {
  854. return false;
  855. }
  856. p = &multifd_recv_state->params[i];
  857. if (qatomic_read(&p->pending_job) == false) {
  858. next_recv_channel = (i + 1) % migrate_multifd_channels();
  859. break;
  860. }
  861. }
  862. /*
  863. * Order pending_job read before manipulating p->data below. Pairs
  864. * with qatomic_store_release() at multifd_recv_thread().
  865. */
  866. smp_mb_acquire();
  867. assert(!p->data->size);
  868. multifd_recv_state->data = p->data;
  869. p->data = data;
  870. /*
  871. * Order p->data update before setting pending_job. Pairs with
  872. * qatomic_load_acquire() at multifd_recv_thread().
  873. */
  874. qatomic_store_release(&p->pending_job, true);
  875. qemu_sem_post(&p->sem);
  876. return true;
  877. }
  878. MultiFDRecvData *multifd_get_recv_data(void)
  879. {
  880. return multifd_recv_state->data;
  881. }
  882. static void multifd_recv_terminate_threads(Error *err)
  883. {
  884. int i;
  885. trace_multifd_recv_terminate_threads(err != NULL);
  886. if (qatomic_xchg(&multifd_recv_state->exiting, 1)) {
  887. return;
  888. }
  889. if (err) {
  890. MigrationState *s = migrate_get_current();
  891. migrate_set_error(s, err);
  892. if (s->state == MIGRATION_STATUS_SETUP ||
  893. s->state == MIGRATION_STATUS_ACTIVE) {
  894. migrate_set_state(&s->state, s->state,
  895. MIGRATION_STATUS_FAILED);
  896. }
  897. }
  898. for (i = 0; i < migrate_multifd_channels(); i++) {
  899. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  900. /*
  901. * The migration thread and channels interact differently
  902. * depending on the presence of packets.
  903. */
  904. if (multifd_use_packets()) {
  905. /*
  906. * The channel receives as long as there are packets. When
  907. * packets end (i.e. MULTIFD_FLAG_SYNC is reached), the
  908. * channel waits for the migration thread to sync. If the
  909. * sync never happens, do it here.
  910. */
  911. qemu_sem_post(&p->sem_sync);
  912. } else {
  913. /*
  914. * The channel waits for the migration thread to give it
  915. * work. When the migration thread runs out of work, it
  916. * releases the channel and waits for any pending work to
  917. * finish. If we reach here (e.g. due to error) before the
  918. * work runs out, release the channel.
  919. */
  920. qemu_sem_post(&p->sem);
  921. }
  922. /*
  923. * We could arrive here for two reasons:
  924. * - normal quit, i.e. everything went fine, just finished
  925. * - error quit: We close the channels so the channel threads
  926. * finish the qio_channel_read_all_eof()
  927. */
  928. if (p->c) {
  929. qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
  930. }
  931. }
  932. }
  933. void multifd_recv_shutdown(void)
  934. {
  935. if (migrate_multifd()) {
  936. multifd_recv_terminate_threads(NULL);
  937. }
  938. }
  939. static void multifd_recv_cleanup_channel(MultiFDRecvParams *p)
  940. {
  941. migration_ioc_unregister_yank(p->c);
  942. object_unref(OBJECT(p->c));
  943. p->c = NULL;
  944. qemu_mutex_destroy(&p->mutex);
  945. qemu_sem_destroy(&p->sem_sync);
  946. qemu_sem_destroy(&p->sem);
  947. g_free(p->data);
  948. p->data = NULL;
  949. g_free(p->name);
  950. p->name = NULL;
  951. p->packet_len = 0;
  952. g_free(p->packet);
  953. p->packet = NULL;
  954. g_clear_pointer(&p->packet_dev_state, g_free);
  955. g_free(p->normal);
  956. p->normal = NULL;
  957. g_free(p->zero);
  958. p->zero = NULL;
  959. multifd_recv_state->ops->recv_cleanup(p);
  960. }
  961. static void multifd_recv_cleanup_state(void)
  962. {
  963. qemu_sem_destroy(&multifd_recv_state->sem_sync);
  964. g_free(multifd_recv_state->params);
  965. multifd_recv_state->params = NULL;
  966. g_free(multifd_recv_state->data);
  967. multifd_recv_state->data = NULL;
  968. g_free(multifd_recv_state);
  969. multifd_recv_state = NULL;
  970. }
  971. void multifd_recv_cleanup(void)
  972. {
  973. int i;
  974. if (!migrate_multifd()) {
  975. return;
  976. }
  977. multifd_recv_terminate_threads(NULL);
  978. for (i = 0; i < migrate_multifd_channels(); i++) {
  979. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  980. if (p->thread_created) {
  981. qemu_thread_join(&p->thread);
  982. }
  983. }
  984. for (i = 0; i < migrate_multifd_channels(); i++) {
  985. multifd_recv_cleanup_channel(&multifd_recv_state->params[i]);
  986. }
  987. multifd_recv_cleanup_state();
  988. }
  989. void multifd_recv_sync_main(void)
  990. {
  991. int thread_count = migrate_multifd_channels();
  992. bool file_based = !multifd_use_packets();
  993. int i;
  994. if (!migrate_multifd()) {
  995. return;
  996. }
  997. /*
  998. * File-based channels don't use packets and therefore need to
  999. * wait for more work. Release them to start the sync.
  1000. */
  1001. if (file_based) {
  1002. for (i = 0; i < thread_count; i++) {
  1003. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  1004. trace_multifd_recv_sync_main_signal(p->id);
  1005. qemu_sem_post(&p->sem);
  1006. }
  1007. }
  1008. /*
  1009. * Initiate the synchronization by waiting for all channels.
  1010. *
  1011. * For socket-based migration this means each channel has received
  1012. * the SYNC packet on the stream.
  1013. *
  1014. * For file-based migration this means each channel is done with
  1015. * the work (pending_job=false).
  1016. */
  1017. for (i = 0; i < thread_count; i++) {
  1018. trace_multifd_recv_sync_main_wait(i);
  1019. qemu_sem_wait(&multifd_recv_state->sem_sync);
  1020. }
  1021. if (file_based) {
  1022. /*
  1023. * For file-based loading is done in one iteration. We're
  1024. * done.
  1025. */
  1026. return;
  1027. }
  1028. /*
  1029. * Sync done. Release the channels for the next iteration.
  1030. */
  1031. for (i = 0; i < thread_count; i++) {
  1032. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  1033. WITH_QEMU_LOCK_GUARD(&p->mutex) {
  1034. if (multifd_recv_state->packet_num < p->packet_num) {
  1035. multifd_recv_state->packet_num = p->packet_num;
  1036. }
  1037. }
  1038. trace_multifd_recv_sync_main_signal(p->id);
  1039. qemu_sem_post(&p->sem_sync);
  1040. }
  1041. trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
  1042. }
  1043. static int multifd_device_state_recv(MultiFDRecvParams *p, Error **errp)
  1044. {
  1045. g_autofree char *dev_state_buf = NULL;
  1046. int ret;
  1047. dev_state_buf = g_malloc(p->next_packet_size);
  1048. ret = qio_channel_read_all(p->c, dev_state_buf, p->next_packet_size, errp);
  1049. if (ret != 0) {
  1050. return ret;
  1051. }
  1052. if (p->packet_dev_state->idstr[sizeof(p->packet_dev_state->idstr) - 1]
  1053. != 0) {
  1054. error_setg(errp, "unterminated multifd device state idstr");
  1055. return -1;
  1056. }
  1057. if (!qemu_loadvm_load_state_buffer(p->packet_dev_state->idstr,
  1058. p->packet_dev_state->instance_id,
  1059. dev_state_buf, p->next_packet_size,
  1060. errp)) {
  1061. ret = -1;
  1062. }
  1063. return ret;
  1064. }
  1065. static void *multifd_recv_thread(void *opaque)
  1066. {
  1067. MigrationState *s = migrate_get_current();
  1068. MultiFDRecvParams *p = opaque;
  1069. Error *local_err = NULL;
  1070. bool use_packets = multifd_use_packets();
  1071. int ret;
  1072. trace_multifd_recv_thread_start(p->id);
  1073. rcu_register_thread();
  1074. if (!s->multifd_clean_tls_termination) {
  1075. p->read_flags = QIO_CHANNEL_READ_FLAG_RELAXED_EOF;
  1076. }
  1077. while (true) {
  1078. MultiFDPacketHdr_t hdr;
  1079. uint32_t flags = 0;
  1080. bool is_device_state = false;
  1081. bool has_data = false;
  1082. uint8_t *pkt_buf;
  1083. size_t pkt_len;
  1084. p->normal_num = 0;
  1085. if (use_packets) {
  1086. struct iovec iov = {
  1087. .iov_base = (void *)&hdr,
  1088. .iov_len = sizeof(hdr)
  1089. };
  1090. if (multifd_recv_should_exit()) {
  1091. break;
  1092. }
  1093. ret = qio_channel_readv_full_all_eof(p->c, &iov, 1, NULL, NULL,
  1094. p->read_flags, &local_err);
  1095. if (!ret) {
  1096. /* EOF */
  1097. assert(!local_err);
  1098. break;
  1099. }
  1100. if (ret == -1) {
  1101. break;
  1102. }
  1103. ret = multifd_recv_unfill_packet_header(p, &hdr, &local_err);
  1104. if (ret) {
  1105. break;
  1106. }
  1107. is_device_state = p->flags & MULTIFD_FLAG_DEVICE_STATE;
  1108. if (is_device_state) {
  1109. pkt_buf = (uint8_t *)p->packet_dev_state + sizeof(hdr);
  1110. pkt_len = sizeof(*p->packet_dev_state) - sizeof(hdr);
  1111. } else {
  1112. pkt_buf = (uint8_t *)p->packet + sizeof(hdr);
  1113. pkt_len = p->packet_len - sizeof(hdr);
  1114. }
  1115. ret = qio_channel_read_all_eof(p->c, (char *)pkt_buf, pkt_len,
  1116. &local_err);
  1117. if (!ret) {
  1118. /* EOF */
  1119. error_setg(&local_err, "multifd: unexpected EOF after packet header");
  1120. break;
  1121. }
  1122. if (ret == -1) {
  1123. break;
  1124. }
  1125. qemu_mutex_lock(&p->mutex);
  1126. ret = multifd_recv_unfill_packet(p, &local_err);
  1127. if (ret) {
  1128. qemu_mutex_unlock(&p->mutex);
  1129. break;
  1130. }
  1131. flags = p->flags;
  1132. /* recv methods don't know how to handle the SYNC flag */
  1133. p->flags &= ~MULTIFD_FLAG_SYNC;
  1134. if (is_device_state) {
  1135. has_data = p->next_packet_size > 0;
  1136. } else {
  1137. /*
  1138. * Even if it's a SYNC packet, this needs to be set
  1139. * because older QEMUs (<9.0) still send data along with
  1140. * the SYNC packet.
  1141. */
  1142. has_data = p->normal_num || p->zero_num;
  1143. }
  1144. qemu_mutex_unlock(&p->mutex);
  1145. } else {
  1146. /*
  1147. * No packets, so we need to wait for the vmstate code to
  1148. * give us work.
  1149. */
  1150. qemu_sem_wait(&p->sem);
  1151. if (multifd_recv_should_exit()) {
  1152. break;
  1153. }
  1154. /* pairs with qatomic_store_release() at multifd_recv() */
  1155. if (!qatomic_load_acquire(&p->pending_job)) {
  1156. /*
  1157. * Migration thread did not send work, this is
  1158. * equivalent to pending_sync on the sending
  1159. * side. Post sem_sync to notify we reached this
  1160. * point.
  1161. */
  1162. qemu_sem_post(&multifd_recv_state->sem_sync);
  1163. continue;
  1164. }
  1165. has_data = !!p->data->size;
  1166. }
  1167. if (has_data) {
  1168. if (is_device_state) {
  1169. assert(use_packets);
  1170. ret = multifd_device_state_recv(p, &local_err);
  1171. } else {
  1172. ret = multifd_recv_state->ops->recv(p, &local_err);
  1173. }
  1174. if (ret != 0) {
  1175. break;
  1176. }
  1177. } else if (is_device_state) {
  1178. error_setg(&local_err,
  1179. "multifd: received empty device state packet");
  1180. break;
  1181. }
  1182. if (use_packets) {
  1183. if (flags & MULTIFD_FLAG_SYNC) {
  1184. if (is_device_state) {
  1185. error_setg(&local_err,
  1186. "multifd: received SYNC device state packet");
  1187. break;
  1188. }
  1189. qemu_sem_post(&multifd_recv_state->sem_sync);
  1190. qemu_sem_wait(&p->sem_sync);
  1191. }
  1192. } else {
  1193. p->data->size = 0;
  1194. /*
  1195. * Order data->size update before clearing
  1196. * pending_job. Pairs with smp_mb_acquire() at
  1197. * multifd_recv().
  1198. */
  1199. qatomic_store_release(&p->pending_job, false);
  1200. }
  1201. }
  1202. if (local_err) {
  1203. multifd_recv_terminate_threads(local_err);
  1204. error_free(local_err);
  1205. }
  1206. rcu_unregister_thread();
  1207. trace_multifd_recv_thread_end(p->id, p->packets_recved);
  1208. return NULL;
  1209. }
  1210. int multifd_recv_setup(Error **errp)
  1211. {
  1212. int thread_count;
  1213. uint32_t page_count = multifd_ram_page_count();
  1214. bool use_packets = multifd_use_packets();
  1215. uint8_t i;
  1216. /*
  1217. * Return successfully if multiFD recv state is already initialised
  1218. * or multiFD is not enabled.
  1219. */
  1220. if (multifd_recv_state || !migrate_multifd()) {
  1221. return 0;
  1222. }
  1223. thread_count = migrate_multifd_channels();
  1224. multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
  1225. multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
  1226. multifd_recv_state->data = g_new0(MultiFDRecvData, 1);
  1227. multifd_recv_state->data->size = 0;
  1228. qatomic_set(&multifd_recv_state->count, 0);
  1229. qatomic_set(&multifd_recv_state->exiting, 0);
  1230. qemu_sem_init(&multifd_recv_state->sem_sync, 0);
  1231. multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()];
  1232. for (i = 0; i < thread_count; i++) {
  1233. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  1234. qemu_mutex_init(&p->mutex);
  1235. qemu_sem_init(&p->sem_sync, 0);
  1236. qemu_sem_init(&p->sem, 0);
  1237. p->pending_job = false;
  1238. p->id = i;
  1239. p->data = g_new0(MultiFDRecvData, 1);
  1240. p->data->size = 0;
  1241. if (use_packets) {
  1242. p->packet_len = sizeof(MultiFDPacket_t)
  1243. + sizeof(uint64_t) * page_count;
  1244. p->packet = g_malloc0(p->packet_len);
  1245. p->packet_dev_state = g_malloc0(sizeof(*p->packet_dev_state));
  1246. }
  1247. p->name = g_strdup_printf(MIGRATION_THREAD_DST_MULTIFD, i);
  1248. p->normal = g_new0(ram_addr_t, page_count);
  1249. p->zero = g_new0(ram_addr_t, page_count);
  1250. }
  1251. for (i = 0; i < thread_count; i++) {
  1252. MultiFDRecvParams *p = &multifd_recv_state->params[i];
  1253. int ret;
  1254. ret = multifd_recv_state->ops->recv_setup(p, errp);
  1255. if (ret) {
  1256. return ret;
  1257. }
  1258. }
  1259. return 0;
  1260. }
  1261. bool multifd_recv_all_channels_created(void)
  1262. {
  1263. int thread_count = migrate_multifd_channels();
  1264. if (!migrate_multifd()) {
  1265. return true;
  1266. }
  1267. if (!multifd_recv_state) {
  1268. /* Called before any connections created */
  1269. return false;
  1270. }
  1271. return thread_count == qatomic_read(&multifd_recv_state->count);
  1272. }
  1273. /*
  1274. * Try to receive all multifd channels to get ready for the migration.
  1275. * Sets @errp when failing to receive the current channel.
  1276. */
  1277. void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
  1278. {
  1279. MultiFDRecvParams *p;
  1280. Error *local_err = NULL;
  1281. bool use_packets = multifd_use_packets();
  1282. int id;
  1283. if (use_packets) {
  1284. id = multifd_recv_initial_packet(ioc, &local_err);
  1285. if (id < 0) {
  1286. multifd_recv_terminate_threads(local_err);
  1287. error_propagate_prepend(errp, local_err,
  1288. "failed to receive packet"
  1289. " via multifd channel %d: ",
  1290. qatomic_read(&multifd_recv_state->count));
  1291. return;
  1292. }
  1293. trace_multifd_recv_new_channel(id);
  1294. } else {
  1295. id = qatomic_read(&multifd_recv_state->count);
  1296. }
  1297. p = &multifd_recv_state->params[id];
  1298. if (p->c != NULL) {
  1299. error_setg(&local_err, "multifd: received id '%d' already setup'",
  1300. id);
  1301. multifd_recv_terminate_threads(local_err);
  1302. error_propagate(errp, local_err);
  1303. return;
  1304. }
  1305. p->c = ioc;
  1306. object_ref(OBJECT(ioc));
  1307. p->thread_created = true;
  1308. qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
  1309. QEMU_THREAD_JOINABLE);
  1310. qatomic_inc(&multifd_recv_state->count);
  1311. }