vmstate.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. /*
  2. * VMState interpreter
  3. *
  4. * Copyright (c) 2009-2017 Red Hat Inc
  5. *
  6. * Authors:
  7. * Juan Quintela <quintela@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "migration.h"
  14. #include "migration/vmstate.h"
  15. #include "savevm.h"
  16. #include "qapi/error.h"
  17. #include "qobject/json-writer.h"
  18. #include "qemu-file.h"
  19. #include "qemu/bitops.h"
  20. #include "qemu/error-report.h"
  21. #include "trace.h"
  22. static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
  23. void *opaque, JSONWriter *vmdesc,
  24. Error **errp);
  25. static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
  26. void *opaque);
  27. /* Whether this field should exist for either save or load the VM? */
  28. static bool
  29. vmstate_field_exists(const VMStateDescription *vmsd, const VMStateField *field,
  30. void *opaque, int version_id)
  31. {
  32. bool result;
  33. if (field->field_exists) {
  34. /* If there's the function checker, that's the solo truth */
  35. result = field->field_exists(opaque, version_id);
  36. trace_vmstate_field_exists(vmsd->name, field->name, field->version_id,
  37. version_id, result);
  38. } else {
  39. /*
  40. * Otherwise, we only save/load if field version is same or older.
  41. * For example, when loading from an old binary with old version,
  42. * we ignore new fields with newer version_ids.
  43. */
  44. result = field->version_id <= version_id;
  45. }
  46. return result;
  47. }
  48. /*
  49. * Create a fake nullptr field when there's a NULL pointer detected in the
  50. * array of a VMS_ARRAY_OF_POINTER VMSD field. It's needed because we
  51. * can't dereference the NULL pointer.
  52. */
  53. static const VMStateField *
  54. vmsd_create_fake_nullptr_field(const VMStateField *field)
  55. {
  56. VMStateField *fake = g_new0(VMStateField, 1);
  57. /* It can only happen on an array of pointers! */
  58. assert(field->flags & VMS_ARRAY_OF_POINTER);
  59. /* Some of fake's properties should match the original's */
  60. fake->name = field->name;
  61. fake->version_id = field->version_id;
  62. /* Do not need "field_exists" check as it always exists (which is null) */
  63. fake->field_exists = NULL;
  64. /* See vmstate_info_nullptr - use 1 byte to represent nullptr */
  65. fake->size = 1;
  66. fake->info = &vmstate_info_nullptr;
  67. fake->flags = VMS_SINGLE;
  68. /* All the rest fields shouldn't matter.. */
  69. return (const VMStateField *)fake;
  70. }
  71. static int vmstate_n_elems(void *opaque, const VMStateField *field)
  72. {
  73. int n_elems = 1;
  74. if (field->flags & VMS_ARRAY) {
  75. n_elems = field->num;
  76. } else if (field->flags & VMS_VARRAY_INT32) {
  77. n_elems = *(int32_t *)(opaque + field->num_offset);
  78. } else if (field->flags & VMS_VARRAY_UINT32) {
  79. n_elems = *(uint32_t *)(opaque + field->num_offset);
  80. } else if (field->flags & VMS_VARRAY_UINT16) {
  81. n_elems = *(uint16_t *)(opaque + field->num_offset);
  82. } else if (field->flags & VMS_VARRAY_UINT8) {
  83. n_elems = *(uint8_t *)(opaque + field->num_offset);
  84. }
  85. if (field->flags & VMS_MULTIPLY_ELEMENTS) {
  86. n_elems *= field->num;
  87. }
  88. trace_vmstate_n_elems(field->name, n_elems);
  89. return n_elems;
  90. }
  91. static int vmstate_size(void *opaque, const VMStateField *field)
  92. {
  93. int size = field->size;
  94. if (field->flags & VMS_VBUFFER) {
  95. size = *(int32_t *)(opaque + field->size_offset);
  96. if (field->flags & VMS_MULTIPLY) {
  97. size *= field->size;
  98. }
  99. }
  100. return size;
  101. }
  102. static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
  103. void *opaque)
  104. {
  105. if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) {
  106. gsize size = vmstate_size(opaque, field);
  107. size *= vmstate_n_elems(opaque, field);
  108. if (size) {
  109. *(void **)ptr = g_malloc(size);
  110. }
  111. }
  112. }
  113. int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
  114. void *opaque, int version_id)
  115. {
  116. const VMStateField *field = vmsd->fields;
  117. int ret = 0;
  118. trace_vmstate_load_state(vmsd->name, version_id);
  119. if (version_id > vmsd->version_id) {
  120. error_report("%s: incoming version_id %d is too new "
  121. "for local version_id %d",
  122. vmsd->name, version_id, vmsd->version_id);
  123. trace_vmstate_load_state_end(vmsd->name, "too new", -EINVAL);
  124. return -EINVAL;
  125. }
  126. if (version_id < vmsd->minimum_version_id) {
  127. error_report("%s: incoming version_id %d is too old "
  128. "for local minimum version_id %d",
  129. vmsd->name, version_id, vmsd->minimum_version_id);
  130. trace_vmstate_load_state_end(vmsd->name, "too old", -EINVAL);
  131. return -EINVAL;
  132. }
  133. if (vmsd->pre_load) {
  134. ret = vmsd->pre_load(opaque);
  135. if (ret) {
  136. return ret;
  137. }
  138. }
  139. while (field->name) {
  140. bool exists = vmstate_field_exists(vmsd, field, opaque, version_id);
  141. trace_vmstate_load_state_field(vmsd->name, field->name, exists);
  142. if (exists) {
  143. void *first_elem = opaque + field->offset;
  144. int i, n_elems = vmstate_n_elems(opaque, field);
  145. int size = vmstate_size(opaque, field);
  146. vmstate_handle_alloc(first_elem, field, opaque);
  147. if (field->flags & VMS_POINTER) {
  148. first_elem = *(void **)first_elem;
  149. assert(first_elem || !n_elems || !size);
  150. }
  151. for (i = 0; i < n_elems; i++) {
  152. void *curr_elem = first_elem + size * i;
  153. const VMStateField *inner_field;
  154. if (field->flags & VMS_ARRAY_OF_POINTER) {
  155. curr_elem = *(void **)curr_elem;
  156. }
  157. if (!curr_elem && size) {
  158. /*
  159. * If null pointer found (which should only happen in
  160. * an array of pointers), use null placeholder and do
  161. * not follow.
  162. */
  163. inner_field = vmsd_create_fake_nullptr_field(field);
  164. } else {
  165. inner_field = field;
  166. }
  167. if (inner_field->flags & VMS_STRUCT) {
  168. ret = vmstate_load_state(f, inner_field->vmsd, curr_elem,
  169. inner_field->vmsd->version_id);
  170. } else if (inner_field->flags & VMS_VSTRUCT) {
  171. ret = vmstate_load_state(f, inner_field->vmsd, curr_elem,
  172. inner_field->struct_version_id);
  173. } else {
  174. ret = inner_field->info->get(f, curr_elem, size,
  175. inner_field);
  176. }
  177. /* If we used a fake temp field.. free it now */
  178. if (inner_field != field) {
  179. g_clear_pointer((gpointer *)&inner_field, g_free);
  180. }
  181. if (ret >= 0) {
  182. ret = qemu_file_get_error(f);
  183. }
  184. if (ret < 0) {
  185. qemu_file_set_error(f, ret);
  186. error_report("Failed to load %s:%s", vmsd->name,
  187. field->name);
  188. trace_vmstate_load_field_error(field->name, ret);
  189. return ret;
  190. }
  191. }
  192. } else if (field->flags & VMS_MUST_EXIST) {
  193. error_report("Input validation failed: %s/%s",
  194. vmsd->name, field->name);
  195. return -1;
  196. }
  197. field++;
  198. }
  199. assert(field->flags == VMS_END);
  200. ret = vmstate_subsection_load(f, vmsd, opaque);
  201. if (ret != 0) {
  202. qemu_file_set_error(f, ret);
  203. return ret;
  204. }
  205. if (vmsd->post_load) {
  206. ret = vmsd->post_load(opaque, version_id);
  207. }
  208. trace_vmstate_load_state_end(vmsd->name, "end", ret);
  209. return ret;
  210. }
  211. static int vmfield_name_num(const VMStateField *start,
  212. const VMStateField *search)
  213. {
  214. const VMStateField *field;
  215. int found = 0;
  216. for (field = start; field->name; field++) {
  217. if (!strcmp(field->name, search->name)) {
  218. if (field == search) {
  219. return found;
  220. }
  221. found++;
  222. }
  223. }
  224. return -1;
  225. }
  226. static bool vmfield_name_is_unique(const VMStateField *start,
  227. const VMStateField *search)
  228. {
  229. const VMStateField *field;
  230. int found = 0;
  231. for (field = start; field->name; field++) {
  232. if (!strcmp(field->name, search->name)) {
  233. found++;
  234. /* name found more than once, so it's not unique */
  235. if (found > 1) {
  236. return false;
  237. }
  238. }
  239. }
  240. return true;
  241. }
  242. static const char *vmfield_get_type_name(const VMStateField *field)
  243. {
  244. const char *type = "unknown";
  245. if (field->flags & VMS_STRUCT) {
  246. type = "struct";
  247. } else if (field->flags & VMS_VSTRUCT) {
  248. type = "vstruct";
  249. } else if (field->info->name) {
  250. type = field->info->name;
  251. }
  252. return type;
  253. }
  254. static bool vmsd_can_compress(const VMStateField *field)
  255. {
  256. if (field->field_exists) {
  257. /* Dynamically existing fields mess up compression */
  258. return false;
  259. }
  260. if (field->flags & VMS_STRUCT) {
  261. const VMStateField *sfield = field->vmsd->fields;
  262. while (sfield->name) {
  263. if (!vmsd_can_compress(sfield)) {
  264. /* Child elements can't compress, so can't we */
  265. return false;
  266. }
  267. sfield++;
  268. }
  269. if (field->vmsd->subsections) {
  270. /* Subsections may come and go, better don't compress */
  271. return false;
  272. }
  273. }
  274. return true;
  275. }
  276. static void vmsd_desc_field_start(const VMStateDescription *vmsd,
  277. JSONWriter *vmdesc,
  278. const VMStateField *field, int i, int max)
  279. {
  280. char *name, *old_name;
  281. bool is_array = max > 1;
  282. bool can_compress = vmsd_can_compress(field);
  283. if (!vmdesc) {
  284. return;
  285. }
  286. name = g_strdup(field->name);
  287. /* Field name is not unique, need to make it unique */
  288. if (!vmfield_name_is_unique(vmsd->fields, field)) {
  289. int num = vmfield_name_num(vmsd->fields, field);
  290. old_name = name;
  291. name = g_strdup_printf("%s[%d]", name, num);
  292. g_free(old_name);
  293. }
  294. json_writer_start_object(vmdesc, NULL);
  295. json_writer_str(vmdesc, "name", name);
  296. if (is_array) {
  297. if (can_compress) {
  298. json_writer_int64(vmdesc, "array_len", max);
  299. } else {
  300. json_writer_int64(vmdesc, "index", i);
  301. }
  302. }
  303. json_writer_str(vmdesc, "type", vmfield_get_type_name(field));
  304. if (field->flags & VMS_STRUCT) {
  305. json_writer_start_object(vmdesc, "struct");
  306. }
  307. g_free(name);
  308. }
  309. static void vmsd_desc_field_end(const VMStateDescription *vmsd,
  310. JSONWriter *vmdesc,
  311. const VMStateField *field, size_t size)
  312. {
  313. if (!vmdesc) {
  314. return;
  315. }
  316. if (field->flags & VMS_STRUCT) {
  317. /* We printed a struct in between, close its child object */
  318. json_writer_end_object(vmdesc);
  319. }
  320. json_writer_int64(vmdesc, "size", size);
  321. json_writer_end_object(vmdesc);
  322. }
  323. bool vmstate_section_needed(const VMStateDescription *vmsd, void *opaque)
  324. {
  325. if (vmsd->needed && !vmsd->needed(opaque)) {
  326. /* optional section not needed */
  327. return false;
  328. }
  329. return true;
  330. }
  331. int vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
  332. void *opaque, JSONWriter *vmdesc_id)
  333. {
  334. return vmstate_save_state_v(f, vmsd, opaque, vmdesc_id, vmsd->version_id, NULL);
  335. }
  336. int vmstate_save_state_with_err(QEMUFile *f, const VMStateDescription *vmsd,
  337. void *opaque, JSONWriter *vmdesc_id, Error **errp)
  338. {
  339. return vmstate_save_state_v(f, vmsd, opaque, vmdesc_id, vmsd->version_id, errp);
  340. }
  341. int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd,
  342. void *opaque, JSONWriter *vmdesc, int version_id, Error **errp)
  343. {
  344. int ret = 0;
  345. const VMStateField *field = vmsd->fields;
  346. trace_vmstate_save_state_top(vmsd->name);
  347. if (vmsd->pre_save) {
  348. ret = vmsd->pre_save(opaque);
  349. trace_vmstate_save_state_pre_save_res(vmsd->name, ret);
  350. if (ret) {
  351. error_setg(errp, "pre-save failed: %s", vmsd->name);
  352. return ret;
  353. }
  354. }
  355. if (vmdesc) {
  356. json_writer_str(vmdesc, "vmsd_name", vmsd->name);
  357. json_writer_int64(vmdesc, "version", version_id);
  358. json_writer_start_array(vmdesc, "fields");
  359. }
  360. while (field->name) {
  361. if (vmstate_field_exists(vmsd, field, opaque, version_id)) {
  362. void *first_elem = opaque + field->offset;
  363. int i, n_elems = vmstate_n_elems(opaque, field);
  364. int size = vmstate_size(opaque, field);
  365. uint64_t old_offset, written_bytes;
  366. JSONWriter *vmdesc_loop = vmdesc;
  367. bool is_prev_null = false;
  368. trace_vmstate_save_state_loop(vmsd->name, field->name, n_elems);
  369. if (field->flags & VMS_POINTER) {
  370. first_elem = *(void **)first_elem;
  371. assert(first_elem || !n_elems || !size);
  372. }
  373. for (i = 0; i < n_elems; i++) {
  374. void *curr_elem = first_elem + size * i;
  375. const VMStateField *inner_field;
  376. bool is_null;
  377. int max_elems = n_elems - i;
  378. old_offset = qemu_file_transferred(f);
  379. if (field->flags & VMS_ARRAY_OF_POINTER) {
  380. assert(curr_elem);
  381. curr_elem = *(void **)curr_elem;
  382. }
  383. if (!curr_elem && size) {
  384. /*
  385. * If null pointer found (which should only happen in
  386. * an array of pointers), use null placeholder and do
  387. * not follow.
  388. */
  389. inner_field = vmsd_create_fake_nullptr_field(field);
  390. is_null = true;
  391. } else {
  392. inner_field = field;
  393. is_null = false;
  394. }
  395. /*
  396. * This logic only matters when dumping VM Desc.
  397. *
  398. * Due to the fake nullptr handling above, if there's mixed
  399. * null/non-null data, it doesn't make sense to emit a
  400. * compressed array representation spanning the entire array
  401. * because the field types will be different (e.g. struct
  402. * vs. nullptr). Search ahead for the next null/non-null element
  403. * and start a new compressed array if found.
  404. */
  405. if (vmdesc && (field->flags & VMS_ARRAY_OF_POINTER) &&
  406. is_null != is_prev_null) {
  407. is_prev_null = is_null;
  408. vmdesc_loop = vmdesc;
  409. for (int j = i + 1; j < n_elems; j++) {
  410. void *elem = *(void **)(first_elem + size * j);
  411. bool elem_is_null = !elem && size;
  412. if (is_null != elem_is_null) {
  413. max_elems = j - i;
  414. break;
  415. }
  416. }
  417. }
  418. vmsd_desc_field_start(vmsd, vmdesc_loop, inner_field,
  419. i, max_elems);
  420. if (inner_field->flags & VMS_STRUCT) {
  421. ret = vmstate_save_state(f, inner_field->vmsd,
  422. curr_elem, vmdesc_loop);
  423. } else if (inner_field->flags & VMS_VSTRUCT) {
  424. ret = vmstate_save_state_v(f, inner_field->vmsd,
  425. curr_elem, vmdesc_loop,
  426. inner_field->struct_version_id,
  427. errp);
  428. } else {
  429. ret = inner_field->info->put(f, curr_elem, size,
  430. inner_field, vmdesc_loop);
  431. }
  432. written_bytes = qemu_file_transferred(f) - old_offset;
  433. vmsd_desc_field_end(vmsd, vmdesc_loop, inner_field,
  434. written_bytes);
  435. /* If we used a fake temp field.. free it now */
  436. if (is_null) {
  437. g_clear_pointer((gpointer *)&inner_field, g_free);
  438. }
  439. if (ret) {
  440. error_setg(errp, "Save of field %s/%s failed",
  441. vmsd->name, field->name);
  442. if (vmsd->post_save) {
  443. vmsd->post_save(opaque);
  444. }
  445. return ret;
  446. }
  447. /* Compressed arrays only care about the first element */
  448. if (vmdesc_loop && vmsd_can_compress(field)) {
  449. vmdesc_loop = NULL;
  450. }
  451. }
  452. } else {
  453. if (field->flags & VMS_MUST_EXIST) {
  454. error_report("Output state validation failed: %s/%s",
  455. vmsd->name, field->name);
  456. assert(!(field->flags & VMS_MUST_EXIST));
  457. }
  458. }
  459. field++;
  460. }
  461. assert(field->flags == VMS_END);
  462. if (vmdesc) {
  463. json_writer_end_array(vmdesc);
  464. }
  465. ret = vmstate_subsection_save(f, vmsd, opaque, vmdesc, errp);
  466. if (vmsd->post_save) {
  467. int ps_ret = vmsd->post_save(opaque);
  468. if (!ret && ps_ret) {
  469. ret = ps_ret;
  470. error_setg(errp, "post-save failed: %s", vmsd->name);
  471. }
  472. }
  473. return ret;
  474. }
  475. static const VMStateDescription *
  476. vmstate_get_subsection(const VMStateDescription * const *sub,
  477. const char *idstr)
  478. {
  479. if (sub) {
  480. for (const VMStateDescription *s = *sub; s ; s = *++sub) {
  481. if (strcmp(idstr, s->name) == 0) {
  482. return s;
  483. }
  484. }
  485. }
  486. return NULL;
  487. }
  488. static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
  489. void *opaque)
  490. {
  491. trace_vmstate_subsection_load(vmsd->name);
  492. while (qemu_peek_byte(f, 0) == QEMU_VM_SUBSECTION) {
  493. char idstr[256], *idstr_ret;
  494. int ret;
  495. uint8_t version_id, len, size;
  496. const VMStateDescription *sub_vmsd;
  497. len = qemu_peek_byte(f, 1);
  498. if (len < strlen(vmsd->name) + 1) {
  499. /* subsection name has to be "section_name/a" */
  500. trace_vmstate_subsection_load_bad(vmsd->name, "(short)", "");
  501. return 0;
  502. }
  503. size = qemu_peek_buffer(f, (uint8_t **)&idstr_ret, len, 2);
  504. if (size != len) {
  505. trace_vmstate_subsection_load_bad(vmsd->name, "(peek fail)", "");
  506. return 0;
  507. }
  508. memcpy(idstr, idstr_ret, size);
  509. idstr[size] = 0;
  510. if (strncmp(vmsd->name, idstr, strlen(vmsd->name)) != 0) {
  511. trace_vmstate_subsection_load_bad(vmsd->name, idstr, "(prefix)");
  512. /* it doesn't have a valid subsection name */
  513. return 0;
  514. }
  515. sub_vmsd = vmstate_get_subsection(vmsd->subsections, idstr);
  516. if (sub_vmsd == NULL) {
  517. trace_vmstate_subsection_load_bad(vmsd->name, idstr, "(lookup)");
  518. return -ENOENT;
  519. }
  520. qemu_file_skip(f, 1); /* subsection */
  521. qemu_file_skip(f, 1); /* len */
  522. qemu_file_skip(f, len); /* idstr */
  523. version_id = qemu_get_be32(f);
  524. ret = vmstate_load_state(f, sub_vmsd, opaque, version_id);
  525. if (ret) {
  526. trace_vmstate_subsection_load_bad(vmsd->name, idstr, "(child)");
  527. return ret;
  528. }
  529. }
  530. trace_vmstate_subsection_load_good(vmsd->name);
  531. return 0;
  532. }
  533. static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
  534. void *opaque, JSONWriter *vmdesc,
  535. Error **errp)
  536. {
  537. const VMStateDescription * const *sub = vmsd->subsections;
  538. bool vmdesc_has_subsections = false;
  539. int ret = 0;
  540. trace_vmstate_subsection_save_top(vmsd->name);
  541. while (sub && *sub) {
  542. if (vmstate_section_needed(*sub, opaque)) {
  543. const VMStateDescription *vmsdsub = *sub;
  544. uint8_t len;
  545. trace_vmstate_subsection_save_loop(vmsd->name, vmsdsub->name);
  546. if (vmdesc) {
  547. /* Only create subsection array when we have any */
  548. if (!vmdesc_has_subsections) {
  549. json_writer_start_array(vmdesc, "subsections");
  550. vmdesc_has_subsections = true;
  551. }
  552. json_writer_start_object(vmdesc, NULL);
  553. }
  554. qemu_put_byte(f, QEMU_VM_SUBSECTION);
  555. len = strlen(vmsdsub->name);
  556. qemu_put_byte(f, len);
  557. qemu_put_buffer(f, (uint8_t *)vmsdsub->name, len);
  558. qemu_put_be32(f, vmsdsub->version_id);
  559. ret = vmstate_save_state_with_err(f, vmsdsub, opaque, vmdesc, errp);
  560. if (ret) {
  561. return ret;
  562. }
  563. if (vmdesc) {
  564. json_writer_end_object(vmdesc);
  565. }
  566. }
  567. sub++;
  568. }
  569. if (vmdesc_has_subsections) {
  570. json_writer_end_array(vmdesc);
  571. }
  572. return ret;
  573. }