dump.c 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846
  1. /*
  2. * QEMU dump
  3. *
  4. * Copyright Fujitsu, Corp. 2011, 2012
  5. *
  6. * Authors:
  7. * Wen Congyang <wency@cn.fujitsu.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. *
  12. */
  13. #include "qemu/osdep.h"
  14. #include "qemu/cutils.h"
  15. #include "elf.h"
  16. #include "cpu.h"
  17. #include "exec/cpu-all.h"
  18. #include "exec/hwaddr.h"
  19. #include "monitor/monitor.h"
  20. #include "sysemu/kvm.h"
  21. #include "sysemu/dump.h"
  22. #include "sysemu/sysemu.h"
  23. #include "sysemu/memory_mapping.h"
  24. #include "sysemu/cpus.h"
  25. #include "qapi/qmp/qerror.h"
  26. #include "qmp-commands.h"
  27. #include "qapi-event.h"
  28. #include <zlib.h>
  29. #ifdef CONFIG_LZO
  30. #include <lzo/lzo1x.h>
  31. #endif
  32. #ifdef CONFIG_SNAPPY
  33. #include <snappy-c.h>
  34. #endif
  35. #ifndef ELF_MACHINE_UNAME
  36. #define ELF_MACHINE_UNAME "Unknown"
  37. #endif
  38. uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
  39. {
  40. if (s->dump_info.d_endian == ELFDATA2LSB) {
  41. val = cpu_to_le16(val);
  42. } else {
  43. val = cpu_to_be16(val);
  44. }
  45. return val;
  46. }
  47. uint32_t cpu_to_dump32(DumpState *s, uint32_t val)
  48. {
  49. if (s->dump_info.d_endian == ELFDATA2LSB) {
  50. val = cpu_to_le32(val);
  51. } else {
  52. val = cpu_to_be32(val);
  53. }
  54. return val;
  55. }
  56. uint64_t cpu_to_dump64(DumpState *s, uint64_t val)
  57. {
  58. if (s->dump_info.d_endian == ELFDATA2LSB) {
  59. val = cpu_to_le64(val);
  60. } else {
  61. val = cpu_to_be64(val);
  62. }
  63. return val;
  64. }
  65. static int dump_cleanup(DumpState *s)
  66. {
  67. guest_phys_blocks_free(&s->guest_phys_blocks);
  68. memory_mapping_list_free(&s->list);
  69. close(s->fd);
  70. if (s->resume) {
  71. vm_start();
  72. }
  73. return 0;
  74. }
  75. static int fd_write_vmcore(const void *buf, size_t size, void *opaque)
  76. {
  77. DumpState *s = opaque;
  78. size_t written_size;
  79. written_size = qemu_write_full(s->fd, buf, size);
  80. if (written_size != size) {
  81. return -1;
  82. }
  83. return 0;
  84. }
  85. static void write_elf64_header(DumpState *s, Error **errp)
  86. {
  87. Elf64_Ehdr elf_header;
  88. int ret;
  89. memset(&elf_header, 0, sizeof(Elf64_Ehdr));
  90. memcpy(&elf_header, ELFMAG, SELFMAG);
  91. elf_header.e_ident[EI_CLASS] = ELFCLASS64;
  92. elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
  93. elf_header.e_ident[EI_VERSION] = EV_CURRENT;
  94. elf_header.e_type = cpu_to_dump16(s, ET_CORE);
  95. elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
  96. elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
  97. elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
  98. elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr));
  99. elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr));
  100. elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
  101. if (s->have_section) {
  102. uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info;
  103. elf_header.e_shoff = cpu_to_dump64(s, shoff);
  104. elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr));
  105. elf_header.e_shnum = cpu_to_dump16(s, 1);
  106. }
  107. ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
  108. if (ret < 0) {
  109. error_setg(errp, "dump: failed to write elf header");
  110. }
  111. }
  112. static void write_elf32_header(DumpState *s, Error **errp)
  113. {
  114. Elf32_Ehdr elf_header;
  115. int ret;
  116. memset(&elf_header, 0, sizeof(Elf32_Ehdr));
  117. memcpy(&elf_header, ELFMAG, SELFMAG);
  118. elf_header.e_ident[EI_CLASS] = ELFCLASS32;
  119. elf_header.e_ident[EI_DATA] = s->dump_info.d_endian;
  120. elf_header.e_ident[EI_VERSION] = EV_CURRENT;
  121. elf_header.e_type = cpu_to_dump16(s, ET_CORE);
  122. elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine);
  123. elf_header.e_version = cpu_to_dump32(s, EV_CURRENT);
  124. elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header));
  125. elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr));
  126. elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr));
  127. elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num);
  128. if (s->have_section) {
  129. uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info;
  130. elf_header.e_shoff = cpu_to_dump32(s, shoff);
  131. elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr));
  132. elf_header.e_shnum = cpu_to_dump16(s, 1);
  133. }
  134. ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s);
  135. if (ret < 0) {
  136. error_setg(errp, "dump: failed to write elf header");
  137. }
  138. }
  139. static void write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
  140. int phdr_index, hwaddr offset,
  141. hwaddr filesz, Error **errp)
  142. {
  143. Elf64_Phdr phdr;
  144. int ret;
  145. memset(&phdr, 0, sizeof(Elf64_Phdr));
  146. phdr.p_type = cpu_to_dump32(s, PT_LOAD);
  147. phdr.p_offset = cpu_to_dump64(s, offset);
  148. phdr.p_paddr = cpu_to_dump64(s, memory_mapping->phys_addr);
  149. phdr.p_filesz = cpu_to_dump64(s, filesz);
  150. phdr.p_memsz = cpu_to_dump64(s, memory_mapping->length);
  151. phdr.p_vaddr = cpu_to_dump64(s, memory_mapping->virt_addr);
  152. assert(memory_mapping->length >= filesz);
  153. ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
  154. if (ret < 0) {
  155. error_setg(errp, "dump: failed to write program header table");
  156. }
  157. }
  158. static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
  159. int phdr_index, hwaddr offset,
  160. hwaddr filesz, Error **errp)
  161. {
  162. Elf32_Phdr phdr;
  163. int ret;
  164. memset(&phdr, 0, sizeof(Elf32_Phdr));
  165. phdr.p_type = cpu_to_dump32(s, PT_LOAD);
  166. phdr.p_offset = cpu_to_dump32(s, offset);
  167. phdr.p_paddr = cpu_to_dump32(s, memory_mapping->phys_addr);
  168. phdr.p_filesz = cpu_to_dump32(s, filesz);
  169. phdr.p_memsz = cpu_to_dump32(s, memory_mapping->length);
  170. phdr.p_vaddr = cpu_to_dump32(s, memory_mapping->virt_addr);
  171. assert(memory_mapping->length >= filesz);
  172. ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
  173. if (ret < 0) {
  174. error_setg(errp, "dump: failed to write program header table");
  175. }
  176. }
  177. static void write_elf64_note(DumpState *s, Error **errp)
  178. {
  179. Elf64_Phdr phdr;
  180. hwaddr begin = s->memory_offset - s->note_size;
  181. int ret;
  182. memset(&phdr, 0, sizeof(Elf64_Phdr));
  183. phdr.p_type = cpu_to_dump32(s, PT_NOTE);
  184. phdr.p_offset = cpu_to_dump64(s, begin);
  185. phdr.p_paddr = 0;
  186. phdr.p_filesz = cpu_to_dump64(s, s->note_size);
  187. phdr.p_memsz = cpu_to_dump64(s, s->note_size);
  188. phdr.p_vaddr = 0;
  189. ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
  190. if (ret < 0) {
  191. error_setg(errp, "dump: failed to write program header table");
  192. }
  193. }
  194. static inline int cpu_index(CPUState *cpu)
  195. {
  196. return cpu->cpu_index + 1;
  197. }
  198. static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
  199. Error **errp)
  200. {
  201. CPUState *cpu;
  202. int ret;
  203. int id;
  204. CPU_FOREACH(cpu) {
  205. id = cpu_index(cpu);
  206. ret = cpu_write_elf64_note(f, cpu, id, s);
  207. if (ret < 0) {
  208. error_setg(errp, "dump: failed to write elf notes");
  209. return;
  210. }
  211. }
  212. CPU_FOREACH(cpu) {
  213. ret = cpu_write_elf64_qemunote(f, cpu, s);
  214. if (ret < 0) {
  215. error_setg(errp, "dump: failed to write CPU status");
  216. return;
  217. }
  218. }
  219. }
  220. static void write_elf32_note(DumpState *s, Error **errp)
  221. {
  222. hwaddr begin = s->memory_offset - s->note_size;
  223. Elf32_Phdr phdr;
  224. int ret;
  225. memset(&phdr, 0, sizeof(Elf32_Phdr));
  226. phdr.p_type = cpu_to_dump32(s, PT_NOTE);
  227. phdr.p_offset = cpu_to_dump32(s, begin);
  228. phdr.p_paddr = 0;
  229. phdr.p_filesz = cpu_to_dump32(s, s->note_size);
  230. phdr.p_memsz = cpu_to_dump32(s, s->note_size);
  231. phdr.p_vaddr = 0;
  232. ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
  233. if (ret < 0) {
  234. error_setg(errp, "dump: failed to write program header table");
  235. }
  236. }
  237. static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
  238. Error **errp)
  239. {
  240. CPUState *cpu;
  241. int ret;
  242. int id;
  243. CPU_FOREACH(cpu) {
  244. id = cpu_index(cpu);
  245. ret = cpu_write_elf32_note(f, cpu, id, s);
  246. if (ret < 0) {
  247. error_setg(errp, "dump: failed to write elf notes");
  248. return;
  249. }
  250. }
  251. CPU_FOREACH(cpu) {
  252. ret = cpu_write_elf32_qemunote(f, cpu, s);
  253. if (ret < 0) {
  254. error_setg(errp, "dump: failed to write CPU status");
  255. return;
  256. }
  257. }
  258. }
  259. static void write_elf_section(DumpState *s, int type, Error **errp)
  260. {
  261. Elf32_Shdr shdr32;
  262. Elf64_Shdr shdr64;
  263. int shdr_size;
  264. void *shdr;
  265. int ret;
  266. if (type == 0) {
  267. shdr_size = sizeof(Elf32_Shdr);
  268. memset(&shdr32, 0, shdr_size);
  269. shdr32.sh_info = cpu_to_dump32(s, s->sh_info);
  270. shdr = &shdr32;
  271. } else {
  272. shdr_size = sizeof(Elf64_Shdr);
  273. memset(&shdr64, 0, shdr_size);
  274. shdr64.sh_info = cpu_to_dump32(s, s->sh_info);
  275. shdr = &shdr64;
  276. }
  277. ret = fd_write_vmcore(&shdr, shdr_size, s);
  278. if (ret < 0) {
  279. error_setg(errp, "dump: failed to write section header table");
  280. }
  281. }
  282. static void write_data(DumpState *s, void *buf, int length, Error **errp)
  283. {
  284. int ret;
  285. ret = fd_write_vmcore(buf, length, s);
  286. if (ret < 0) {
  287. error_setg(errp, "dump: failed to save memory");
  288. } else {
  289. s->written_size += length;
  290. }
  291. }
  292. /* write the memory to vmcore. 1 page per I/O. */
  293. static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
  294. int64_t size, Error **errp)
  295. {
  296. int64_t i;
  297. Error *local_err = NULL;
  298. for (i = 0; i < size / s->dump_info.page_size; i++) {
  299. write_data(s, block->host_addr + start + i * s->dump_info.page_size,
  300. s->dump_info.page_size, &local_err);
  301. if (local_err) {
  302. error_propagate(errp, local_err);
  303. return;
  304. }
  305. }
  306. if ((size % s->dump_info.page_size) != 0) {
  307. write_data(s, block->host_addr + start + i * s->dump_info.page_size,
  308. size % s->dump_info.page_size, &local_err);
  309. if (local_err) {
  310. error_propagate(errp, local_err);
  311. return;
  312. }
  313. }
  314. }
  315. /* get the memory's offset and size in the vmcore */
  316. static void get_offset_range(hwaddr phys_addr,
  317. ram_addr_t mapping_length,
  318. DumpState *s,
  319. hwaddr *p_offset,
  320. hwaddr *p_filesz)
  321. {
  322. GuestPhysBlock *block;
  323. hwaddr offset = s->memory_offset;
  324. int64_t size_in_block, start;
  325. /* When the memory is not stored into vmcore, offset will be -1 */
  326. *p_offset = -1;
  327. *p_filesz = 0;
  328. if (s->has_filter) {
  329. if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
  330. return;
  331. }
  332. }
  333. QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
  334. if (s->has_filter) {
  335. if (block->target_start >= s->begin + s->length ||
  336. block->target_end <= s->begin) {
  337. /* This block is out of the range */
  338. continue;
  339. }
  340. if (s->begin <= block->target_start) {
  341. start = block->target_start;
  342. } else {
  343. start = s->begin;
  344. }
  345. size_in_block = block->target_end - start;
  346. if (s->begin + s->length < block->target_end) {
  347. size_in_block -= block->target_end - (s->begin + s->length);
  348. }
  349. } else {
  350. start = block->target_start;
  351. size_in_block = block->target_end - block->target_start;
  352. }
  353. if (phys_addr >= start && phys_addr < start + size_in_block) {
  354. *p_offset = phys_addr - start + offset;
  355. /* The offset range mapped from the vmcore file must not spill over
  356. * the GuestPhysBlock, clamp it. The rest of the mapping will be
  357. * zero-filled in memory at load time; see
  358. * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
  359. */
  360. *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
  361. mapping_length :
  362. size_in_block - (phys_addr - start);
  363. return;
  364. }
  365. offset += size_in_block;
  366. }
  367. }
  368. static void write_elf_loads(DumpState *s, Error **errp)
  369. {
  370. hwaddr offset, filesz;
  371. MemoryMapping *memory_mapping;
  372. uint32_t phdr_index = 1;
  373. uint32_t max_index;
  374. Error *local_err = NULL;
  375. if (s->have_section) {
  376. max_index = s->sh_info;
  377. } else {
  378. max_index = s->phdr_num;
  379. }
  380. QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
  381. get_offset_range(memory_mapping->phys_addr,
  382. memory_mapping->length,
  383. s, &offset, &filesz);
  384. if (s->dump_info.d_class == ELFCLASS64) {
  385. write_elf64_load(s, memory_mapping, phdr_index++, offset,
  386. filesz, &local_err);
  387. } else {
  388. write_elf32_load(s, memory_mapping, phdr_index++, offset,
  389. filesz, &local_err);
  390. }
  391. if (local_err) {
  392. error_propagate(errp, local_err);
  393. return;
  394. }
  395. if (phdr_index >= max_index) {
  396. break;
  397. }
  398. }
  399. }
  400. /* write elf header, PT_NOTE and elf note to vmcore. */
  401. static void dump_begin(DumpState *s, Error **errp)
  402. {
  403. Error *local_err = NULL;
  404. /*
  405. * the vmcore's format is:
  406. * --------------
  407. * | elf header |
  408. * --------------
  409. * | PT_NOTE |
  410. * --------------
  411. * | PT_LOAD |
  412. * --------------
  413. * | ...... |
  414. * --------------
  415. * | PT_LOAD |
  416. * --------------
  417. * | sec_hdr |
  418. * --------------
  419. * | elf note |
  420. * --------------
  421. * | memory |
  422. * --------------
  423. *
  424. * we only know where the memory is saved after we write elf note into
  425. * vmcore.
  426. */
  427. /* write elf header to vmcore */
  428. if (s->dump_info.d_class == ELFCLASS64) {
  429. write_elf64_header(s, &local_err);
  430. } else {
  431. write_elf32_header(s, &local_err);
  432. }
  433. if (local_err) {
  434. error_propagate(errp, local_err);
  435. return;
  436. }
  437. if (s->dump_info.d_class == ELFCLASS64) {
  438. /* write PT_NOTE to vmcore */
  439. write_elf64_note(s, &local_err);
  440. if (local_err) {
  441. error_propagate(errp, local_err);
  442. return;
  443. }
  444. /* write all PT_LOAD to vmcore */
  445. write_elf_loads(s, &local_err);
  446. if (local_err) {
  447. error_propagate(errp, local_err);
  448. return;
  449. }
  450. /* write section to vmcore */
  451. if (s->have_section) {
  452. write_elf_section(s, 1, &local_err);
  453. if (local_err) {
  454. error_propagate(errp, local_err);
  455. return;
  456. }
  457. }
  458. /* write notes to vmcore */
  459. write_elf64_notes(fd_write_vmcore, s, &local_err);
  460. if (local_err) {
  461. error_propagate(errp, local_err);
  462. return;
  463. }
  464. } else {
  465. /* write PT_NOTE to vmcore */
  466. write_elf32_note(s, &local_err);
  467. if (local_err) {
  468. error_propagate(errp, local_err);
  469. return;
  470. }
  471. /* write all PT_LOAD to vmcore */
  472. write_elf_loads(s, &local_err);
  473. if (local_err) {
  474. error_propagate(errp, local_err);
  475. return;
  476. }
  477. /* write section to vmcore */
  478. if (s->have_section) {
  479. write_elf_section(s, 0, &local_err);
  480. if (local_err) {
  481. error_propagate(errp, local_err);
  482. return;
  483. }
  484. }
  485. /* write notes to vmcore */
  486. write_elf32_notes(fd_write_vmcore, s, &local_err);
  487. if (local_err) {
  488. error_propagate(errp, local_err);
  489. return;
  490. }
  491. }
  492. }
  493. static int get_next_block(DumpState *s, GuestPhysBlock *block)
  494. {
  495. while (1) {
  496. block = QTAILQ_NEXT(block, next);
  497. if (!block) {
  498. /* no more block */
  499. return 1;
  500. }
  501. s->start = 0;
  502. s->next_block = block;
  503. if (s->has_filter) {
  504. if (block->target_start >= s->begin + s->length ||
  505. block->target_end <= s->begin) {
  506. /* This block is out of the range */
  507. continue;
  508. }
  509. if (s->begin > block->target_start) {
  510. s->start = s->begin - block->target_start;
  511. }
  512. }
  513. return 0;
  514. }
  515. }
  516. /* write all memory to vmcore */
  517. static void dump_iterate(DumpState *s, Error **errp)
  518. {
  519. GuestPhysBlock *block;
  520. int64_t size;
  521. Error *local_err = NULL;
  522. do {
  523. block = s->next_block;
  524. size = block->target_end - block->target_start;
  525. if (s->has_filter) {
  526. size -= s->start;
  527. if (s->begin + s->length < block->target_end) {
  528. size -= block->target_end - (s->begin + s->length);
  529. }
  530. }
  531. write_memory(s, block, s->start, size, &local_err);
  532. if (local_err) {
  533. error_propagate(errp, local_err);
  534. return;
  535. }
  536. } while (!get_next_block(s, block));
  537. }
  538. static void create_vmcore(DumpState *s, Error **errp)
  539. {
  540. Error *local_err = NULL;
  541. dump_begin(s, &local_err);
  542. if (local_err) {
  543. error_propagate(errp, local_err);
  544. return;
  545. }
  546. dump_iterate(s, errp);
  547. }
  548. static int write_start_flat_header(int fd)
  549. {
  550. MakedumpfileHeader *mh;
  551. int ret = 0;
  552. QEMU_BUILD_BUG_ON(sizeof *mh > MAX_SIZE_MDF_HEADER);
  553. mh = g_malloc0(MAX_SIZE_MDF_HEADER);
  554. memcpy(mh->signature, MAKEDUMPFILE_SIGNATURE,
  555. MIN(sizeof mh->signature, sizeof MAKEDUMPFILE_SIGNATURE));
  556. mh->type = cpu_to_be64(TYPE_FLAT_HEADER);
  557. mh->version = cpu_to_be64(VERSION_FLAT_HEADER);
  558. size_t written_size;
  559. written_size = qemu_write_full(fd, mh, MAX_SIZE_MDF_HEADER);
  560. if (written_size != MAX_SIZE_MDF_HEADER) {
  561. ret = -1;
  562. }
  563. g_free(mh);
  564. return ret;
  565. }
  566. static int write_end_flat_header(int fd)
  567. {
  568. MakedumpfileDataHeader mdh;
  569. mdh.offset = END_FLAG_FLAT_HEADER;
  570. mdh.buf_size = END_FLAG_FLAT_HEADER;
  571. size_t written_size;
  572. written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
  573. if (written_size != sizeof(mdh)) {
  574. return -1;
  575. }
  576. return 0;
  577. }
  578. static int write_buffer(int fd, off_t offset, const void *buf, size_t size)
  579. {
  580. size_t written_size;
  581. MakedumpfileDataHeader mdh;
  582. mdh.offset = cpu_to_be64(offset);
  583. mdh.buf_size = cpu_to_be64(size);
  584. written_size = qemu_write_full(fd, &mdh, sizeof(mdh));
  585. if (written_size != sizeof(mdh)) {
  586. return -1;
  587. }
  588. written_size = qemu_write_full(fd, buf, size);
  589. if (written_size != size) {
  590. return -1;
  591. }
  592. return 0;
  593. }
  594. static int buf_write_note(const void *buf, size_t size, void *opaque)
  595. {
  596. DumpState *s = opaque;
  597. /* note_buf is not enough */
  598. if (s->note_buf_offset + size > s->note_size) {
  599. return -1;
  600. }
  601. memcpy(s->note_buf + s->note_buf_offset, buf, size);
  602. s->note_buf_offset += size;
  603. return 0;
  604. }
  605. /* write common header, sub header and elf note to vmcore */
  606. static void create_header32(DumpState *s, Error **errp)
  607. {
  608. DiskDumpHeader32 *dh = NULL;
  609. KdumpSubHeader32 *kh = NULL;
  610. size_t size;
  611. uint32_t block_size;
  612. uint32_t sub_hdr_size;
  613. uint32_t bitmap_blocks;
  614. uint32_t status = 0;
  615. uint64_t offset_note;
  616. Error *local_err = NULL;
  617. /* write common header, the version of kdump-compressed format is 6th */
  618. size = sizeof(DiskDumpHeader32);
  619. dh = g_malloc0(size);
  620. strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
  621. dh->header_version = cpu_to_dump32(s, 6);
  622. block_size = s->dump_info.page_size;
  623. dh->block_size = cpu_to_dump32(s, block_size);
  624. sub_hdr_size = sizeof(struct KdumpSubHeader32) + s->note_size;
  625. sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
  626. dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
  627. /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
  628. dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
  629. dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
  630. bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
  631. dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
  632. strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
  633. if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
  634. status |= DUMP_DH_COMPRESSED_ZLIB;
  635. }
  636. #ifdef CONFIG_LZO
  637. if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
  638. status |= DUMP_DH_COMPRESSED_LZO;
  639. }
  640. #endif
  641. #ifdef CONFIG_SNAPPY
  642. if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
  643. status |= DUMP_DH_COMPRESSED_SNAPPY;
  644. }
  645. #endif
  646. dh->status = cpu_to_dump32(s, status);
  647. if (write_buffer(s->fd, 0, dh, size) < 0) {
  648. error_setg(errp, "dump: failed to write disk dump header");
  649. goto out;
  650. }
  651. /* write sub header */
  652. size = sizeof(KdumpSubHeader32);
  653. kh = g_malloc0(size);
  654. /* 64bit max_mapnr_64 */
  655. kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
  656. kh->phys_base = cpu_to_dump32(s, s->dump_info.phys_base);
  657. kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
  658. offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
  659. kh->offset_note = cpu_to_dump64(s, offset_note);
  660. kh->note_size = cpu_to_dump32(s, s->note_size);
  661. if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
  662. block_size, kh, size) < 0) {
  663. error_setg(errp, "dump: failed to write kdump sub header");
  664. goto out;
  665. }
  666. /* write note */
  667. s->note_buf = g_malloc0(s->note_size);
  668. s->note_buf_offset = 0;
  669. /* use s->note_buf to store notes temporarily */
  670. write_elf32_notes(buf_write_note, s, &local_err);
  671. if (local_err) {
  672. error_propagate(errp, local_err);
  673. goto out;
  674. }
  675. if (write_buffer(s->fd, offset_note, s->note_buf,
  676. s->note_size) < 0) {
  677. error_setg(errp, "dump: failed to write notes");
  678. goto out;
  679. }
  680. /* get offset of dump_bitmap */
  681. s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
  682. block_size;
  683. /* get offset of page */
  684. s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
  685. block_size;
  686. out:
  687. g_free(dh);
  688. g_free(kh);
  689. g_free(s->note_buf);
  690. }
  691. /* write common header, sub header and elf note to vmcore */
  692. static void create_header64(DumpState *s, Error **errp)
  693. {
  694. DiskDumpHeader64 *dh = NULL;
  695. KdumpSubHeader64 *kh = NULL;
  696. size_t size;
  697. uint32_t block_size;
  698. uint32_t sub_hdr_size;
  699. uint32_t bitmap_blocks;
  700. uint32_t status = 0;
  701. uint64_t offset_note;
  702. Error *local_err = NULL;
  703. /* write common header, the version of kdump-compressed format is 6th */
  704. size = sizeof(DiskDumpHeader64);
  705. dh = g_malloc0(size);
  706. strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE));
  707. dh->header_version = cpu_to_dump32(s, 6);
  708. block_size = s->dump_info.page_size;
  709. dh->block_size = cpu_to_dump32(s, block_size);
  710. sub_hdr_size = sizeof(struct KdumpSubHeader64) + s->note_size;
  711. sub_hdr_size = DIV_ROUND_UP(sub_hdr_size, block_size);
  712. dh->sub_hdr_size = cpu_to_dump32(s, sub_hdr_size);
  713. /* dh->max_mapnr may be truncated, full 64bit is in kh.max_mapnr_64 */
  714. dh->max_mapnr = cpu_to_dump32(s, MIN(s->max_mapnr, UINT_MAX));
  715. dh->nr_cpus = cpu_to_dump32(s, s->nr_cpus);
  716. bitmap_blocks = DIV_ROUND_UP(s->len_dump_bitmap, block_size) * 2;
  717. dh->bitmap_blocks = cpu_to_dump32(s, bitmap_blocks);
  718. strncpy(dh->utsname.machine, ELF_MACHINE_UNAME, sizeof(dh->utsname.machine));
  719. if (s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) {
  720. status |= DUMP_DH_COMPRESSED_ZLIB;
  721. }
  722. #ifdef CONFIG_LZO
  723. if (s->flag_compress & DUMP_DH_COMPRESSED_LZO) {
  724. status |= DUMP_DH_COMPRESSED_LZO;
  725. }
  726. #endif
  727. #ifdef CONFIG_SNAPPY
  728. if (s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) {
  729. status |= DUMP_DH_COMPRESSED_SNAPPY;
  730. }
  731. #endif
  732. dh->status = cpu_to_dump32(s, status);
  733. if (write_buffer(s->fd, 0, dh, size) < 0) {
  734. error_setg(errp, "dump: failed to write disk dump header");
  735. goto out;
  736. }
  737. /* write sub header */
  738. size = sizeof(KdumpSubHeader64);
  739. kh = g_malloc0(size);
  740. /* 64bit max_mapnr_64 */
  741. kh->max_mapnr_64 = cpu_to_dump64(s, s->max_mapnr);
  742. kh->phys_base = cpu_to_dump64(s, s->dump_info.phys_base);
  743. kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);
  744. offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
  745. kh->offset_note = cpu_to_dump64(s, offset_note);
  746. kh->note_size = cpu_to_dump64(s, s->note_size);
  747. if (write_buffer(s->fd, DISKDUMP_HEADER_BLOCKS *
  748. block_size, kh, size) < 0) {
  749. error_setg(errp, "dump: failed to write kdump sub header");
  750. goto out;
  751. }
  752. /* write note */
  753. s->note_buf = g_malloc0(s->note_size);
  754. s->note_buf_offset = 0;
  755. /* use s->note_buf to store notes temporarily */
  756. write_elf64_notes(buf_write_note, s, &local_err);
  757. if (local_err) {
  758. error_propagate(errp, local_err);
  759. goto out;
  760. }
  761. if (write_buffer(s->fd, offset_note, s->note_buf,
  762. s->note_size) < 0) {
  763. error_setg(errp, "dump: failed to write notes");
  764. goto out;
  765. }
  766. /* get offset of dump_bitmap */
  767. s->offset_dump_bitmap = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size) *
  768. block_size;
  769. /* get offset of page */
  770. s->offset_page = (DISKDUMP_HEADER_BLOCKS + sub_hdr_size + bitmap_blocks) *
  771. block_size;
  772. out:
  773. g_free(dh);
  774. g_free(kh);
  775. g_free(s->note_buf);
  776. }
  777. static void write_dump_header(DumpState *s, Error **errp)
  778. {
  779. Error *local_err = NULL;
  780. if (s->dump_info.d_class == ELFCLASS32) {
  781. create_header32(s, &local_err);
  782. } else {
  783. create_header64(s, &local_err);
  784. }
  785. error_propagate(errp, local_err);
  786. }
  787. static size_t dump_bitmap_get_bufsize(DumpState *s)
  788. {
  789. return s->dump_info.page_size;
  790. }
  791. /*
  792. * set dump_bitmap sequencely. the bit before last_pfn is not allowed to be
  793. * rewritten, so if need to set the first bit, set last_pfn and pfn to 0.
  794. * set_dump_bitmap will always leave the recently set bit un-sync. And setting
  795. * (last bit + sizeof(buf) * 8) to 0 will do flushing the content in buf into
  796. * vmcore, ie. synchronizing un-sync bit into vmcore.
  797. */
  798. static int set_dump_bitmap(uint64_t last_pfn, uint64_t pfn, bool value,
  799. uint8_t *buf, DumpState *s)
  800. {
  801. off_t old_offset, new_offset;
  802. off_t offset_bitmap1, offset_bitmap2;
  803. uint32_t byte, bit;
  804. size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
  805. size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
  806. /* should not set the previous place */
  807. assert(last_pfn <= pfn);
  808. /*
  809. * if the bit needed to be set is not cached in buf, flush the data in buf
  810. * to vmcore firstly.
  811. * making new_offset be bigger than old_offset can also sync remained data
  812. * into vmcore.
  813. */
  814. old_offset = bitmap_bufsize * (last_pfn / bits_per_buf);
  815. new_offset = bitmap_bufsize * (pfn / bits_per_buf);
  816. while (old_offset < new_offset) {
  817. /* calculate the offset and write dump_bitmap */
  818. offset_bitmap1 = s->offset_dump_bitmap + old_offset;
  819. if (write_buffer(s->fd, offset_bitmap1, buf,
  820. bitmap_bufsize) < 0) {
  821. return -1;
  822. }
  823. /* dump level 1 is chosen, so 1st and 2nd bitmap are same */
  824. offset_bitmap2 = s->offset_dump_bitmap + s->len_dump_bitmap +
  825. old_offset;
  826. if (write_buffer(s->fd, offset_bitmap2, buf,
  827. bitmap_bufsize) < 0) {
  828. return -1;
  829. }
  830. memset(buf, 0, bitmap_bufsize);
  831. old_offset += bitmap_bufsize;
  832. }
  833. /* get the exact place of the bit in the buf, and set it */
  834. byte = (pfn % bits_per_buf) / CHAR_BIT;
  835. bit = (pfn % bits_per_buf) % CHAR_BIT;
  836. if (value) {
  837. buf[byte] |= 1u << bit;
  838. } else {
  839. buf[byte] &= ~(1u << bit);
  840. }
  841. return 0;
  842. }
  843. static uint64_t dump_paddr_to_pfn(DumpState *s, uint64_t addr)
  844. {
  845. int target_page_shift = ctz32(s->dump_info.page_size);
  846. return (addr >> target_page_shift) - ARCH_PFN_OFFSET;
  847. }
  848. static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn)
  849. {
  850. int target_page_shift = ctz32(s->dump_info.page_size);
  851. return (pfn + ARCH_PFN_OFFSET) << target_page_shift;
  852. }
  853. /*
  854. * exam every page and return the page frame number and the address of the page.
  855. * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys
  856. * blocks, so block->target_start and block->target_end should be interal
  857. * multiples of the target page size.
  858. */
  859. static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr,
  860. uint8_t **bufptr, DumpState *s)
  861. {
  862. GuestPhysBlock *block = *blockptr;
  863. hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1);
  864. uint8_t *buf;
  865. /* block == NULL means the start of the iteration */
  866. if (!block) {
  867. block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
  868. *blockptr = block;
  869. assert((block->target_start & ~target_page_mask) == 0);
  870. assert((block->target_end & ~target_page_mask) == 0);
  871. *pfnptr = dump_paddr_to_pfn(s, block->target_start);
  872. if (bufptr) {
  873. *bufptr = block->host_addr;
  874. }
  875. return true;
  876. }
  877. *pfnptr = *pfnptr + 1;
  878. addr = dump_pfn_to_paddr(s, *pfnptr);
  879. if ((addr >= block->target_start) &&
  880. (addr + s->dump_info.page_size <= block->target_end)) {
  881. buf = block->host_addr + (addr - block->target_start);
  882. } else {
  883. /* the next page is in the next block */
  884. block = QTAILQ_NEXT(block, next);
  885. *blockptr = block;
  886. if (!block) {
  887. return false;
  888. }
  889. assert((block->target_start & ~target_page_mask) == 0);
  890. assert((block->target_end & ~target_page_mask) == 0);
  891. *pfnptr = dump_paddr_to_pfn(s, block->target_start);
  892. buf = block->host_addr;
  893. }
  894. if (bufptr) {
  895. *bufptr = buf;
  896. }
  897. return true;
  898. }
  899. static void write_dump_bitmap(DumpState *s, Error **errp)
  900. {
  901. int ret = 0;
  902. uint64_t last_pfn, pfn;
  903. void *dump_bitmap_buf;
  904. size_t num_dumpable;
  905. GuestPhysBlock *block_iter = NULL;
  906. size_t bitmap_bufsize = dump_bitmap_get_bufsize(s);
  907. size_t bits_per_buf = bitmap_bufsize * CHAR_BIT;
  908. /* dump_bitmap_buf is used to store dump_bitmap temporarily */
  909. dump_bitmap_buf = g_malloc0(bitmap_bufsize);
  910. num_dumpable = 0;
  911. last_pfn = 0;
  912. /*
  913. * exam memory page by page, and set the bit in dump_bitmap corresponded
  914. * to the existing page.
  915. */
  916. while (get_next_page(&block_iter, &pfn, NULL, s)) {
  917. ret = set_dump_bitmap(last_pfn, pfn, true, dump_bitmap_buf, s);
  918. if (ret < 0) {
  919. error_setg(errp, "dump: failed to set dump_bitmap");
  920. goto out;
  921. }
  922. last_pfn = pfn;
  923. num_dumpable++;
  924. }
  925. /*
  926. * set_dump_bitmap will always leave the recently set bit un-sync. Here we
  927. * set the remaining bits from last_pfn to the end of the bitmap buffer to
  928. * 0. With those set, the un-sync bit will be synchronized into the vmcore.
  929. */
  930. if (num_dumpable > 0) {
  931. ret = set_dump_bitmap(last_pfn, last_pfn + bits_per_buf, false,
  932. dump_bitmap_buf, s);
  933. if (ret < 0) {
  934. error_setg(errp, "dump: failed to sync dump_bitmap");
  935. goto out;
  936. }
  937. }
  938. /* number of dumpable pages that will be dumped later */
  939. s->num_dumpable = num_dumpable;
  940. out:
  941. g_free(dump_bitmap_buf);
  942. }
  943. static void prepare_data_cache(DataCache *data_cache, DumpState *s,
  944. off_t offset)
  945. {
  946. data_cache->fd = s->fd;
  947. data_cache->data_size = 0;
  948. data_cache->buf_size = 4 * dump_bitmap_get_bufsize(s);
  949. data_cache->buf = g_malloc0(data_cache->buf_size);
  950. data_cache->offset = offset;
  951. }
  952. static int write_cache(DataCache *dc, const void *buf, size_t size,
  953. bool flag_sync)
  954. {
  955. /*
  956. * dc->buf_size should not be less than size, otherwise dc will never be
  957. * enough
  958. */
  959. assert(size <= dc->buf_size);
  960. /*
  961. * if flag_sync is set, synchronize data in dc->buf into vmcore.
  962. * otherwise check if the space is enough for caching data in buf, if not,
  963. * write the data in dc->buf to dc->fd and reset dc->buf
  964. */
  965. if ((!flag_sync && dc->data_size + size > dc->buf_size) ||
  966. (flag_sync && dc->data_size > 0)) {
  967. if (write_buffer(dc->fd, dc->offset, dc->buf, dc->data_size) < 0) {
  968. return -1;
  969. }
  970. dc->offset += dc->data_size;
  971. dc->data_size = 0;
  972. }
  973. if (!flag_sync) {
  974. memcpy(dc->buf + dc->data_size, buf, size);
  975. dc->data_size += size;
  976. }
  977. return 0;
  978. }
  979. static void free_data_cache(DataCache *data_cache)
  980. {
  981. g_free(data_cache->buf);
  982. }
  983. static size_t get_len_buf_out(size_t page_size, uint32_t flag_compress)
  984. {
  985. switch (flag_compress) {
  986. case DUMP_DH_COMPRESSED_ZLIB:
  987. return compressBound(page_size);
  988. case DUMP_DH_COMPRESSED_LZO:
  989. /*
  990. * LZO will expand incompressible data by a little amount. Please check
  991. * the following URL to see the expansion calculation:
  992. * http://www.oberhumer.com/opensource/lzo/lzofaq.php
  993. */
  994. return page_size + page_size / 16 + 64 + 3;
  995. #ifdef CONFIG_SNAPPY
  996. case DUMP_DH_COMPRESSED_SNAPPY:
  997. return snappy_max_compressed_length(page_size);
  998. #endif
  999. }
  1000. return 0;
  1001. }
  1002. /*
  1003. * check if the page is all 0
  1004. */
  1005. static inline bool is_zero_page(const uint8_t *buf, size_t page_size)
  1006. {
  1007. return buffer_is_zero(buf, page_size);
  1008. }
  1009. static void write_dump_pages(DumpState *s, Error **errp)
  1010. {
  1011. int ret = 0;
  1012. DataCache page_desc, page_data;
  1013. size_t len_buf_out, size_out;
  1014. #ifdef CONFIG_LZO
  1015. lzo_bytep wrkmem = NULL;
  1016. #endif
  1017. uint8_t *buf_out = NULL;
  1018. off_t offset_desc, offset_data;
  1019. PageDescriptor pd, pd_zero;
  1020. uint8_t *buf;
  1021. GuestPhysBlock *block_iter = NULL;
  1022. uint64_t pfn_iter;
  1023. /* get offset of page_desc and page_data in dump file */
  1024. offset_desc = s->offset_page;
  1025. offset_data = offset_desc + sizeof(PageDescriptor) * s->num_dumpable;
  1026. prepare_data_cache(&page_desc, s, offset_desc);
  1027. prepare_data_cache(&page_data, s, offset_data);
  1028. /* prepare buffer to store compressed data */
  1029. len_buf_out = get_len_buf_out(s->dump_info.page_size, s->flag_compress);
  1030. assert(len_buf_out != 0);
  1031. #ifdef CONFIG_LZO
  1032. wrkmem = g_malloc(LZO1X_1_MEM_COMPRESS);
  1033. #endif
  1034. buf_out = g_malloc(len_buf_out);
  1035. /*
  1036. * init zero page's page_desc and page_data, because every zero page
  1037. * uses the same page_data
  1038. */
  1039. pd_zero.size = cpu_to_dump32(s, s->dump_info.page_size);
  1040. pd_zero.flags = cpu_to_dump32(s, 0);
  1041. pd_zero.offset = cpu_to_dump64(s, offset_data);
  1042. pd_zero.page_flags = cpu_to_dump64(s, 0);
  1043. buf = g_malloc0(s->dump_info.page_size);
  1044. ret = write_cache(&page_data, buf, s->dump_info.page_size, false);
  1045. g_free(buf);
  1046. if (ret < 0) {
  1047. error_setg(errp, "dump: failed to write page data (zero page)");
  1048. goto out;
  1049. }
  1050. offset_data += s->dump_info.page_size;
  1051. /*
  1052. * dump memory to vmcore page by page. zero page will all be resided in the
  1053. * first page of page section
  1054. */
  1055. while (get_next_page(&block_iter, &pfn_iter, &buf, s)) {
  1056. /* check zero page */
  1057. if (is_zero_page(buf, s->dump_info.page_size)) {
  1058. ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor),
  1059. false);
  1060. if (ret < 0) {
  1061. error_setg(errp, "dump: failed to write page desc");
  1062. goto out;
  1063. }
  1064. } else {
  1065. /*
  1066. * not zero page, then:
  1067. * 1. compress the page
  1068. * 2. write the compressed page into the cache of page_data
  1069. * 3. get page desc of the compressed page and write it into the
  1070. * cache of page_desc
  1071. *
  1072. * only one compression format will be used here, for
  1073. * s->flag_compress is set. But when compression fails to work,
  1074. * we fall back to save in plaintext.
  1075. */
  1076. size_out = len_buf_out;
  1077. if ((s->flag_compress & DUMP_DH_COMPRESSED_ZLIB) &&
  1078. (compress2(buf_out, (uLongf *)&size_out, buf,
  1079. s->dump_info.page_size, Z_BEST_SPEED) == Z_OK) &&
  1080. (size_out < s->dump_info.page_size)) {
  1081. pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_ZLIB);
  1082. pd.size = cpu_to_dump32(s, size_out);
  1083. ret = write_cache(&page_data, buf_out, size_out, false);
  1084. if (ret < 0) {
  1085. error_setg(errp, "dump: failed to write page data");
  1086. goto out;
  1087. }
  1088. #ifdef CONFIG_LZO
  1089. } else if ((s->flag_compress & DUMP_DH_COMPRESSED_LZO) &&
  1090. (lzo1x_1_compress(buf, s->dump_info.page_size, buf_out,
  1091. (lzo_uint *)&size_out, wrkmem) == LZO_E_OK) &&
  1092. (size_out < s->dump_info.page_size)) {
  1093. pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_LZO);
  1094. pd.size = cpu_to_dump32(s, size_out);
  1095. ret = write_cache(&page_data, buf_out, size_out, false);
  1096. if (ret < 0) {
  1097. error_setg(errp, "dump: failed to write page data");
  1098. goto out;
  1099. }
  1100. #endif
  1101. #ifdef CONFIG_SNAPPY
  1102. } else if ((s->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) &&
  1103. (snappy_compress((char *)buf, s->dump_info.page_size,
  1104. (char *)buf_out, &size_out) == SNAPPY_OK) &&
  1105. (size_out < s->dump_info.page_size)) {
  1106. pd.flags = cpu_to_dump32(s, DUMP_DH_COMPRESSED_SNAPPY);
  1107. pd.size = cpu_to_dump32(s, size_out);
  1108. ret = write_cache(&page_data, buf_out, size_out, false);
  1109. if (ret < 0) {
  1110. error_setg(errp, "dump: failed to write page data");
  1111. goto out;
  1112. }
  1113. #endif
  1114. } else {
  1115. /*
  1116. * fall back to save in plaintext, size_out should be
  1117. * assigned the target's page size
  1118. */
  1119. pd.flags = cpu_to_dump32(s, 0);
  1120. size_out = s->dump_info.page_size;
  1121. pd.size = cpu_to_dump32(s, size_out);
  1122. ret = write_cache(&page_data, buf,
  1123. s->dump_info.page_size, false);
  1124. if (ret < 0) {
  1125. error_setg(errp, "dump: failed to write page data");
  1126. goto out;
  1127. }
  1128. }
  1129. /* get and write page desc here */
  1130. pd.page_flags = cpu_to_dump64(s, 0);
  1131. pd.offset = cpu_to_dump64(s, offset_data);
  1132. offset_data += size_out;
  1133. ret = write_cache(&page_desc, &pd, sizeof(PageDescriptor), false);
  1134. if (ret < 0) {
  1135. error_setg(errp, "dump: failed to write page desc");
  1136. goto out;
  1137. }
  1138. }
  1139. s->written_size += s->dump_info.page_size;
  1140. }
  1141. ret = write_cache(&page_desc, NULL, 0, true);
  1142. if (ret < 0) {
  1143. error_setg(errp, "dump: failed to sync cache for page_desc");
  1144. goto out;
  1145. }
  1146. ret = write_cache(&page_data, NULL, 0, true);
  1147. if (ret < 0) {
  1148. error_setg(errp, "dump: failed to sync cache for page_data");
  1149. goto out;
  1150. }
  1151. out:
  1152. free_data_cache(&page_desc);
  1153. free_data_cache(&page_data);
  1154. #ifdef CONFIG_LZO
  1155. g_free(wrkmem);
  1156. #endif
  1157. g_free(buf_out);
  1158. }
  1159. static void create_kdump_vmcore(DumpState *s, Error **errp)
  1160. {
  1161. int ret;
  1162. Error *local_err = NULL;
  1163. /*
  1164. * the kdump-compressed format is:
  1165. * File offset
  1166. * +------------------------------------------+ 0x0
  1167. * | main header (struct disk_dump_header) |
  1168. * |------------------------------------------+ block 1
  1169. * | sub header (struct kdump_sub_header) |
  1170. * |------------------------------------------+ block 2
  1171. * | 1st-dump_bitmap |
  1172. * |------------------------------------------+ block 2 + X blocks
  1173. * | 2nd-dump_bitmap | (aligned by block)
  1174. * |------------------------------------------+ block 2 + 2 * X blocks
  1175. * | page desc for pfn 0 (struct page_desc) | (aligned by block)
  1176. * | page desc for pfn 1 (struct page_desc) |
  1177. * | : |
  1178. * |------------------------------------------| (not aligned by block)
  1179. * | page data (pfn 0) |
  1180. * | page data (pfn 1) |
  1181. * | : |
  1182. * +------------------------------------------+
  1183. */
  1184. ret = write_start_flat_header(s->fd);
  1185. if (ret < 0) {
  1186. error_setg(errp, "dump: failed to write start flat header");
  1187. return;
  1188. }
  1189. write_dump_header(s, &local_err);
  1190. if (local_err) {
  1191. error_propagate(errp, local_err);
  1192. return;
  1193. }
  1194. write_dump_bitmap(s, &local_err);
  1195. if (local_err) {
  1196. error_propagate(errp, local_err);
  1197. return;
  1198. }
  1199. write_dump_pages(s, &local_err);
  1200. if (local_err) {
  1201. error_propagate(errp, local_err);
  1202. return;
  1203. }
  1204. ret = write_end_flat_header(s->fd);
  1205. if (ret < 0) {
  1206. error_setg(errp, "dump: failed to write end flat header");
  1207. return;
  1208. }
  1209. }
  1210. static ram_addr_t get_start_block(DumpState *s)
  1211. {
  1212. GuestPhysBlock *block;
  1213. if (!s->has_filter) {
  1214. s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
  1215. return 0;
  1216. }
  1217. QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
  1218. if (block->target_start >= s->begin + s->length ||
  1219. block->target_end <= s->begin) {
  1220. /* This block is out of the range */
  1221. continue;
  1222. }
  1223. s->next_block = block;
  1224. if (s->begin > block->target_start) {
  1225. s->start = s->begin - block->target_start;
  1226. } else {
  1227. s->start = 0;
  1228. }
  1229. return s->start;
  1230. }
  1231. return -1;
  1232. }
  1233. static void get_max_mapnr(DumpState *s)
  1234. {
  1235. GuestPhysBlock *last_block;
  1236. last_block = QTAILQ_LAST(&s->guest_phys_blocks.head, GuestPhysBlockHead);
  1237. s->max_mapnr = dump_paddr_to_pfn(s, last_block->target_end);
  1238. }
  1239. static DumpState dump_state_global = { .status = DUMP_STATUS_NONE };
  1240. static void dump_state_prepare(DumpState *s)
  1241. {
  1242. /* zero the struct, setting status to active */
  1243. *s = (DumpState) { .status = DUMP_STATUS_ACTIVE };
  1244. }
  1245. bool dump_in_progress(void)
  1246. {
  1247. DumpState *state = &dump_state_global;
  1248. return (atomic_read(&state->status) == DUMP_STATUS_ACTIVE);
  1249. }
  1250. /* calculate total size of memory to be dumped (taking filter into
  1251. * acoount.) */
  1252. static int64_t dump_calculate_size(DumpState *s)
  1253. {
  1254. GuestPhysBlock *block;
  1255. int64_t size = 0, total = 0, left = 0, right = 0;
  1256. QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
  1257. if (s->has_filter) {
  1258. /* calculate the overlapped region. */
  1259. left = MAX(s->begin, block->target_start);
  1260. right = MIN(s->begin + s->length, block->target_end);
  1261. size = right - left;
  1262. size = size > 0 ? size : 0;
  1263. } else {
  1264. /* count the whole region in */
  1265. size = (block->target_end - block->target_start);
  1266. }
  1267. total += size;
  1268. }
  1269. return total;
  1270. }
  1271. static void dump_init(DumpState *s, int fd, bool has_format,
  1272. DumpGuestMemoryFormat format, bool paging, bool has_filter,
  1273. int64_t begin, int64_t length, Error **errp)
  1274. {
  1275. CPUState *cpu;
  1276. int nr_cpus;
  1277. Error *err = NULL;
  1278. int ret;
  1279. s->has_format = has_format;
  1280. s->format = format;
  1281. s->written_size = 0;
  1282. /* kdump-compressed is conflict with paging and filter */
  1283. if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
  1284. assert(!paging && !has_filter);
  1285. }
  1286. if (runstate_is_running()) {
  1287. vm_stop(RUN_STATE_SAVE_VM);
  1288. s->resume = true;
  1289. } else {
  1290. s->resume = false;
  1291. }
  1292. /* If we use KVM, we should synchronize the registers before we get dump
  1293. * info or physmap info.
  1294. */
  1295. cpu_synchronize_all_states();
  1296. nr_cpus = 0;
  1297. CPU_FOREACH(cpu) {
  1298. nr_cpus++;
  1299. }
  1300. s->fd = fd;
  1301. s->has_filter = has_filter;
  1302. s->begin = begin;
  1303. s->length = length;
  1304. memory_mapping_list_init(&s->list);
  1305. guest_phys_blocks_init(&s->guest_phys_blocks);
  1306. guest_phys_blocks_append(&s->guest_phys_blocks);
  1307. s->total_size = dump_calculate_size(s);
  1308. #ifdef DEBUG_DUMP_GUEST_MEMORY
  1309. fprintf(stderr, "DUMP: total memory to dump: %lu\n", s->total_size);
  1310. #endif
  1311. s->start = get_start_block(s);
  1312. if (s->start == -1) {
  1313. error_setg(errp, QERR_INVALID_PARAMETER, "begin");
  1314. goto cleanup;
  1315. }
  1316. /* get dump info: endian, class and architecture.
  1317. * If the target architecture is not supported, cpu_get_dump_info() will
  1318. * return -1.
  1319. */
  1320. ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
  1321. if (ret < 0) {
  1322. error_setg(errp, QERR_UNSUPPORTED);
  1323. goto cleanup;
  1324. }
  1325. if (!s->dump_info.page_size) {
  1326. s->dump_info.page_size = TARGET_PAGE_SIZE;
  1327. }
  1328. s->note_size = cpu_get_note_size(s->dump_info.d_class,
  1329. s->dump_info.d_machine, nr_cpus);
  1330. if (s->note_size < 0) {
  1331. error_setg(errp, QERR_UNSUPPORTED);
  1332. goto cleanup;
  1333. }
  1334. /* get memory mapping */
  1335. if (paging) {
  1336. qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
  1337. if (err != NULL) {
  1338. error_propagate(errp, err);
  1339. goto cleanup;
  1340. }
  1341. } else {
  1342. qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
  1343. }
  1344. s->nr_cpus = nr_cpus;
  1345. get_max_mapnr(s);
  1346. uint64_t tmp;
  1347. tmp = DIV_ROUND_UP(DIV_ROUND_UP(s->max_mapnr, CHAR_BIT),
  1348. s->dump_info.page_size);
  1349. s->len_dump_bitmap = tmp * s->dump_info.page_size;
  1350. /* init for kdump-compressed format */
  1351. if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
  1352. switch (format) {
  1353. case DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB:
  1354. s->flag_compress = DUMP_DH_COMPRESSED_ZLIB;
  1355. break;
  1356. case DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO:
  1357. #ifdef CONFIG_LZO
  1358. if (lzo_init() != LZO_E_OK) {
  1359. error_setg(errp, "failed to initialize the LZO library");
  1360. goto cleanup;
  1361. }
  1362. #endif
  1363. s->flag_compress = DUMP_DH_COMPRESSED_LZO;
  1364. break;
  1365. case DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY:
  1366. s->flag_compress = DUMP_DH_COMPRESSED_SNAPPY;
  1367. break;
  1368. default:
  1369. s->flag_compress = 0;
  1370. }
  1371. return;
  1372. }
  1373. if (s->has_filter) {
  1374. memory_mapping_filter(&s->list, s->begin, s->length);
  1375. }
  1376. /*
  1377. * calculate phdr_num
  1378. *
  1379. * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
  1380. */
  1381. s->phdr_num = 1; /* PT_NOTE */
  1382. if (s->list.num < UINT16_MAX - 2) {
  1383. s->phdr_num += s->list.num;
  1384. s->have_section = false;
  1385. } else {
  1386. s->have_section = true;
  1387. s->phdr_num = PN_XNUM;
  1388. s->sh_info = 1; /* PT_NOTE */
  1389. /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */
  1390. if (s->list.num <= UINT32_MAX - 1) {
  1391. s->sh_info += s->list.num;
  1392. } else {
  1393. s->sh_info = UINT32_MAX;
  1394. }
  1395. }
  1396. if (s->dump_info.d_class == ELFCLASS64) {
  1397. if (s->have_section) {
  1398. s->memory_offset = sizeof(Elf64_Ehdr) +
  1399. sizeof(Elf64_Phdr) * s->sh_info +
  1400. sizeof(Elf64_Shdr) + s->note_size;
  1401. } else {
  1402. s->memory_offset = sizeof(Elf64_Ehdr) +
  1403. sizeof(Elf64_Phdr) * s->phdr_num + s->note_size;
  1404. }
  1405. } else {
  1406. if (s->have_section) {
  1407. s->memory_offset = sizeof(Elf32_Ehdr) +
  1408. sizeof(Elf32_Phdr) * s->sh_info +
  1409. sizeof(Elf32_Shdr) + s->note_size;
  1410. } else {
  1411. s->memory_offset = sizeof(Elf32_Ehdr) +
  1412. sizeof(Elf32_Phdr) * s->phdr_num + s->note_size;
  1413. }
  1414. }
  1415. return;
  1416. cleanup:
  1417. dump_cleanup(s);
  1418. }
  1419. /* this operation might be time consuming. */
  1420. static void dump_process(DumpState *s, Error **errp)
  1421. {
  1422. Error *local_err = NULL;
  1423. DumpQueryResult *result = NULL;
  1424. if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) {
  1425. create_kdump_vmcore(s, &local_err);
  1426. } else {
  1427. create_vmcore(s, &local_err);
  1428. }
  1429. /* make sure status is written after written_size updates */
  1430. smp_wmb();
  1431. atomic_set(&s->status,
  1432. (local_err ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED));
  1433. /* send DUMP_COMPLETED message (unconditionally) */
  1434. result = qmp_query_dump(NULL);
  1435. /* should never fail */
  1436. assert(result);
  1437. qapi_event_send_dump_completed(result, !!local_err, (local_err ? \
  1438. error_get_pretty(local_err) : NULL),
  1439. &error_abort);
  1440. qapi_free_DumpQueryResult(result);
  1441. error_propagate(errp, local_err);
  1442. dump_cleanup(s);
  1443. }
  1444. static void *dump_thread(void *data)
  1445. {
  1446. Error *err = NULL;
  1447. DumpState *s = (DumpState *)data;
  1448. dump_process(s, &err);
  1449. error_free(err);
  1450. return NULL;
  1451. }
  1452. DumpQueryResult *qmp_query_dump(Error **errp)
  1453. {
  1454. DumpQueryResult *result = g_new(DumpQueryResult, 1);
  1455. DumpState *state = &dump_state_global;
  1456. result->status = atomic_read(&state->status);
  1457. /* make sure we are reading status and written_size in order */
  1458. smp_rmb();
  1459. result->completed = state->written_size;
  1460. result->total = state->total_size;
  1461. return result;
  1462. }
  1463. void qmp_dump_guest_memory(bool paging, const char *file,
  1464. bool has_detach, bool detach,
  1465. bool has_begin, int64_t begin, bool has_length,
  1466. int64_t length, bool has_format,
  1467. DumpGuestMemoryFormat format, Error **errp)
  1468. {
  1469. const char *p;
  1470. int fd = -1;
  1471. DumpState *s;
  1472. Error *local_err = NULL;
  1473. bool detach_p = false;
  1474. if (runstate_check(RUN_STATE_INMIGRATE)) {
  1475. error_setg(errp, "Dump not allowed during incoming migration.");
  1476. return;
  1477. }
  1478. /* if there is a dump in background, we should wait until the dump
  1479. * finished */
  1480. if (dump_in_progress()) {
  1481. error_setg(errp, "There is a dump in process, please wait.");
  1482. return;
  1483. }
  1484. /*
  1485. * kdump-compressed format need the whole memory dumped, so paging or
  1486. * filter is not supported here.
  1487. */
  1488. if ((has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) &&
  1489. (paging || has_begin || has_length)) {
  1490. error_setg(errp, "kdump-compressed format doesn't support paging or "
  1491. "filter");
  1492. return;
  1493. }
  1494. if (has_begin && !has_length) {
  1495. error_setg(errp, QERR_MISSING_PARAMETER, "length");
  1496. return;
  1497. }
  1498. if (!has_begin && has_length) {
  1499. error_setg(errp, QERR_MISSING_PARAMETER, "begin");
  1500. return;
  1501. }
  1502. if (has_detach) {
  1503. detach_p = detach;
  1504. }
  1505. /* check whether lzo/snappy is supported */
  1506. #ifndef CONFIG_LZO
  1507. if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO) {
  1508. error_setg(errp, "kdump-lzo is not available now");
  1509. return;
  1510. }
  1511. #endif
  1512. #ifndef CONFIG_SNAPPY
  1513. if (has_format && format == DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY) {
  1514. error_setg(errp, "kdump-snappy is not available now");
  1515. return;
  1516. }
  1517. #endif
  1518. #if !defined(WIN32)
  1519. if (strstart(file, "fd:", &p)) {
  1520. fd = monitor_get_fd(cur_mon, p, errp);
  1521. if (fd == -1) {
  1522. return;
  1523. }
  1524. }
  1525. #endif
  1526. if (strstart(file, "file:", &p)) {
  1527. fd = qemu_open(p, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR);
  1528. if (fd < 0) {
  1529. error_setg_file_open(errp, errno, p);
  1530. return;
  1531. }
  1532. }
  1533. if (fd == -1) {
  1534. error_setg(errp, QERR_INVALID_PARAMETER, "protocol");
  1535. return;
  1536. }
  1537. s = &dump_state_global;
  1538. dump_state_prepare(s);
  1539. dump_init(s, fd, has_format, format, paging, has_begin,
  1540. begin, length, &local_err);
  1541. if (local_err) {
  1542. error_propagate(errp, local_err);
  1543. atomic_set(&s->status, DUMP_STATUS_FAILED);
  1544. return;
  1545. }
  1546. if (detach_p) {
  1547. /* detached dump */
  1548. qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
  1549. s, QEMU_THREAD_DETACHED);
  1550. } else {
  1551. /* sync dump */
  1552. dump_process(s, errp);
  1553. }
  1554. }
  1555. DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp)
  1556. {
  1557. DumpGuestMemoryFormatList *item;
  1558. DumpGuestMemoryCapability *cap =
  1559. g_malloc0(sizeof(DumpGuestMemoryCapability));
  1560. /* elf is always available */
  1561. item = g_malloc0(sizeof(DumpGuestMemoryFormatList));
  1562. cap->formats = item;
  1563. item->value = DUMP_GUEST_MEMORY_FORMAT_ELF;
  1564. /* kdump-zlib is always available */
  1565. item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
  1566. item = item->next;
  1567. item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_ZLIB;
  1568. /* add new item if kdump-lzo is available */
  1569. #ifdef CONFIG_LZO
  1570. item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
  1571. item = item->next;
  1572. item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_LZO;
  1573. #endif
  1574. /* add new item if kdump-snappy is available */
  1575. #ifdef CONFIG_SNAPPY
  1576. item->next = g_malloc0(sizeof(DumpGuestMemoryFormatList));
  1577. item = item->next;
  1578. item->value = DUMP_GUEST_MEMORY_FORMAT_KDUMP_SNAPPY;
  1579. #endif
  1580. return cap;
  1581. }