arch_init.c 33 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include <stdint.h>
  25. #include <stdarg.h>
  26. #include <stdlib.h>
  27. #ifndef _WIN32
  28. #include <sys/types.h>
  29. #include <sys/mman.h>
  30. #endif
  31. #include "config.h"
  32. #include "monitor/monitor.h"
  33. #include "sysemu/sysemu.h"
  34. #include "qemu/bitops.h"
  35. #include "qemu/bitmap.h"
  36. #include "sysemu/arch_init.h"
  37. #include "audio/audio.h"
  38. #include "hw/i386/pc.h"
  39. #include "hw/pci/pci.h"
  40. #include "hw/audio/audio.h"
  41. #include "sysemu/kvm.h"
  42. #include "migration/migration.h"
  43. #include "hw/i386/smbios.h"
  44. #include "exec/address-spaces.h"
  45. #include "hw/audio/pcspk.h"
  46. #include "migration/page_cache.h"
  47. #include "qemu/config-file.h"
  48. #include "qmp-commands.h"
  49. #include "trace.h"
  50. #include "exec/cpu-all.h"
  51. #include "hw/acpi/acpi.h"
  52. #ifdef DEBUG_ARCH_INIT
  53. #define DPRINTF(fmt, ...) \
  54. do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
  55. #else
  56. #define DPRINTF(fmt, ...) \
  57. do { } while (0)
  58. #endif
  59. #ifdef TARGET_SPARC
  60. int graphic_width = 1024;
  61. int graphic_height = 768;
  62. int graphic_depth = 8;
  63. #else
  64. int graphic_width = 800;
  65. int graphic_height = 600;
  66. int graphic_depth = 32;
  67. #endif
  68. #if defined(TARGET_ALPHA)
  69. #define QEMU_ARCH QEMU_ARCH_ALPHA
  70. #elif defined(TARGET_ARM)
  71. #define QEMU_ARCH QEMU_ARCH_ARM
  72. #elif defined(TARGET_CRIS)
  73. #define QEMU_ARCH QEMU_ARCH_CRIS
  74. #elif defined(TARGET_I386)
  75. #define QEMU_ARCH QEMU_ARCH_I386
  76. #elif defined(TARGET_M68K)
  77. #define QEMU_ARCH QEMU_ARCH_M68K
  78. #elif defined(TARGET_LM32)
  79. #define QEMU_ARCH QEMU_ARCH_LM32
  80. #elif defined(TARGET_MICROBLAZE)
  81. #define QEMU_ARCH QEMU_ARCH_MICROBLAZE
  82. #elif defined(TARGET_MIPS)
  83. #define QEMU_ARCH QEMU_ARCH_MIPS
  84. #elif defined(TARGET_MOXIE)
  85. #define QEMU_ARCH QEMU_ARCH_MOXIE
  86. #elif defined(TARGET_OPENRISC)
  87. #define QEMU_ARCH QEMU_ARCH_OPENRISC
  88. #elif defined(TARGET_PPC)
  89. #define QEMU_ARCH QEMU_ARCH_PPC
  90. #elif defined(TARGET_S390X)
  91. #define QEMU_ARCH QEMU_ARCH_S390X
  92. #elif defined(TARGET_SH4)
  93. #define QEMU_ARCH QEMU_ARCH_SH4
  94. #elif defined(TARGET_SPARC)
  95. #define QEMU_ARCH QEMU_ARCH_SPARC
  96. #elif defined(TARGET_XTENSA)
  97. #define QEMU_ARCH QEMU_ARCH_XTENSA
  98. #elif defined(TARGET_UNICORE32)
  99. #define QEMU_ARCH QEMU_ARCH_UNICORE32
  100. #endif
  101. const uint32_t arch_type = QEMU_ARCH;
  102. static bool mig_throttle_on;
  103. static int dirty_rate_high_cnt;
  104. static void check_guest_throttling(void);
  105. /***********************************************************/
  106. /* ram save/restore */
  107. #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
  108. #define RAM_SAVE_FLAG_COMPRESS 0x02
  109. #define RAM_SAVE_FLAG_MEM_SIZE 0x04
  110. #define RAM_SAVE_FLAG_PAGE 0x08
  111. #define RAM_SAVE_FLAG_EOS 0x10
  112. #define RAM_SAVE_FLAG_CONTINUE 0x20
  113. #define RAM_SAVE_FLAG_XBZRLE 0x40
  114. /* 0x80 is reserved in migration.h start with 0x100 next */
  115. static struct defconfig_file {
  116. const char *filename;
  117. /* Indicates it is an user config file (disabled by -no-user-config) */
  118. bool userconfig;
  119. } default_config_files[] = {
  120. { CONFIG_QEMU_CONFDIR "/qemu.conf", true },
  121. { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
  122. { NULL }, /* end of list */
  123. };
  124. int qemu_read_default_config_files(bool userconfig)
  125. {
  126. int ret;
  127. struct defconfig_file *f;
  128. for (f = default_config_files; f->filename; f++) {
  129. if (!userconfig && f->userconfig) {
  130. continue;
  131. }
  132. ret = qemu_read_config_file(f->filename);
  133. if (ret < 0 && ret != -ENOENT) {
  134. return ret;
  135. }
  136. }
  137. return 0;
  138. }
  139. static inline bool is_zero_page(uint8_t *p)
  140. {
  141. return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
  142. TARGET_PAGE_SIZE;
  143. }
  144. /* struct contains XBZRLE cache and a static page
  145. used by the compression */
  146. static struct {
  147. /* buffer used for XBZRLE encoding */
  148. uint8_t *encoded_buf;
  149. /* buffer for storing page content */
  150. uint8_t *current_buf;
  151. /* buffer used for XBZRLE decoding */
  152. uint8_t *decoded_buf;
  153. /* Cache for XBZRLE */
  154. PageCache *cache;
  155. } XBZRLE = {
  156. .encoded_buf = NULL,
  157. .current_buf = NULL,
  158. .decoded_buf = NULL,
  159. .cache = NULL,
  160. };
  161. int64_t xbzrle_cache_resize(int64_t new_size)
  162. {
  163. if (XBZRLE.cache != NULL) {
  164. return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
  165. TARGET_PAGE_SIZE;
  166. }
  167. return pow2floor(new_size);
  168. }
  169. /* accounting for migration statistics */
  170. typedef struct AccountingInfo {
  171. uint64_t dup_pages;
  172. uint64_t skipped_pages;
  173. uint64_t norm_pages;
  174. uint64_t iterations;
  175. uint64_t xbzrle_bytes;
  176. uint64_t xbzrle_pages;
  177. uint64_t xbzrle_cache_miss;
  178. uint64_t xbzrle_overflows;
  179. } AccountingInfo;
  180. static AccountingInfo acct_info;
  181. static void acct_clear(void)
  182. {
  183. memset(&acct_info, 0, sizeof(acct_info));
  184. }
  185. uint64_t dup_mig_bytes_transferred(void)
  186. {
  187. return acct_info.dup_pages * TARGET_PAGE_SIZE;
  188. }
  189. uint64_t dup_mig_pages_transferred(void)
  190. {
  191. return acct_info.dup_pages;
  192. }
  193. uint64_t skipped_mig_bytes_transferred(void)
  194. {
  195. return acct_info.skipped_pages * TARGET_PAGE_SIZE;
  196. }
  197. uint64_t skipped_mig_pages_transferred(void)
  198. {
  199. return acct_info.skipped_pages;
  200. }
  201. uint64_t norm_mig_bytes_transferred(void)
  202. {
  203. return acct_info.norm_pages * TARGET_PAGE_SIZE;
  204. }
  205. uint64_t norm_mig_pages_transferred(void)
  206. {
  207. return acct_info.norm_pages;
  208. }
  209. uint64_t xbzrle_mig_bytes_transferred(void)
  210. {
  211. return acct_info.xbzrle_bytes;
  212. }
  213. uint64_t xbzrle_mig_pages_transferred(void)
  214. {
  215. return acct_info.xbzrle_pages;
  216. }
  217. uint64_t xbzrle_mig_pages_cache_miss(void)
  218. {
  219. return acct_info.xbzrle_cache_miss;
  220. }
  221. uint64_t xbzrle_mig_pages_overflow(void)
  222. {
  223. return acct_info.xbzrle_overflows;
  224. }
  225. static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
  226. int cont, int flag)
  227. {
  228. size_t size;
  229. qemu_put_be64(f, offset | cont | flag);
  230. size = 8;
  231. if (!cont) {
  232. qemu_put_byte(f, strlen(block->idstr));
  233. qemu_put_buffer(f, (uint8_t *)block->idstr,
  234. strlen(block->idstr));
  235. size += 1 + strlen(block->idstr);
  236. }
  237. return size;
  238. }
  239. #define ENCODING_FLAG_XBZRLE 0x1
  240. static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
  241. ram_addr_t current_addr, RAMBlock *block,
  242. ram_addr_t offset, int cont, bool last_stage)
  243. {
  244. int encoded_len = 0, bytes_sent = -1;
  245. uint8_t *prev_cached_page;
  246. if (!cache_is_cached(XBZRLE.cache, current_addr)) {
  247. if (!last_stage) {
  248. cache_insert(XBZRLE.cache, current_addr, current_data);
  249. }
  250. acct_info.xbzrle_cache_miss++;
  251. return -1;
  252. }
  253. prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
  254. /* save current buffer into memory */
  255. memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
  256. /* XBZRLE encoding (if there is no overflow) */
  257. encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
  258. TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
  259. TARGET_PAGE_SIZE);
  260. if (encoded_len == 0) {
  261. DPRINTF("Skipping unmodified page\n");
  262. return 0;
  263. } else if (encoded_len == -1) {
  264. DPRINTF("Overflow\n");
  265. acct_info.xbzrle_overflows++;
  266. /* update data in the cache */
  267. memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
  268. return -1;
  269. }
  270. /* we need to update the data in the cache, in order to get the same data */
  271. if (!last_stage) {
  272. memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
  273. }
  274. /* Send XBZRLE based compressed page */
  275. bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
  276. qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
  277. qemu_put_be16(f, encoded_len);
  278. qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
  279. bytes_sent += encoded_len + 1 + 2;
  280. acct_info.xbzrle_pages++;
  281. acct_info.xbzrle_bytes += bytes_sent;
  282. return bytes_sent;
  283. }
  284. /* This is the last block that we have visited serching for dirty pages
  285. */
  286. static RAMBlock *last_seen_block;
  287. /* This is the last block from where we have sent data */
  288. static RAMBlock *last_sent_block;
  289. static ram_addr_t last_offset;
  290. static unsigned long *migration_bitmap;
  291. static uint64_t migration_dirty_pages;
  292. static uint32_t last_version;
  293. static bool ram_bulk_stage;
  294. static inline
  295. ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
  296. ram_addr_t start)
  297. {
  298. unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
  299. unsigned long nr = base + (start >> TARGET_PAGE_BITS);
  300. uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
  301. unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
  302. unsigned long next;
  303. if (ram_bulk_stage && nr > base) {
  304. next = nr + 1;
  305. } else {
  306. next = find_next_bit(migration_bitmap, size, nr);
  307. }
  308. if (next < size) {
  309. clear_bit(next, migration_bitmap);
  310. migration_dirty_pages--;
  311. }
  312. return (next - base) << TARGET_PAGE_BITS;
  313. }
  314. static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
  315. ram_addr_t offset)
  316. {
  317. bool ret;
  318. int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
  319. ret = test_and_set_bit(nr, migration_bitmap);
  320. if (!ret) {
  321. migration_dirty_pages++;
  322. }
  323. return ret;
  324. }
  325. /* Needs iothread lock! */
  326. static void migration_bitmap_sync(void)
  327. {
  328. RAMBlock *block;
  329. ram_addr_t addr;
  330. uint64_t num_dirty_pages_init = migration_dirty_pages;
  331. MigrationState *s = migrate_get_current();
  332. static int64_t start_time;
  333. static int64_t bytes_xfer_prev;
  334. static int64_t num_dirty_pages_period;
  335. int64_t end_time;
  336. int64_t bytes_xfer_now;
  337. if (!bytes_xfer_prev) {
  338. bytes_xfer_prev = ram_bytes_transferred();
  339. }
  340. if (!start_time) {
  341. start_time = qemu_get_clock_ms(rt_clock);
  342. }
  343. trace_migration_bitmap_sync_start();
  344. address_space_sync_dirty_bitmap(&address_space_memory);
  345. QTAILQ_FOREACH(block, &ram_list.blocks, next) {
  346. for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
  347. if (memory_region_test_and_clear_dirty(block->mr,
  348. addr, TARGET_PAGE_SIZE,
  349. DIRTY_MEMORY_MIGRATION)) {
  350. migration_bitmap_set_dirty(block->mr, addr);
  351. }
  352. }
  353. }
  354. trace_migration_bitmap_sync_end(migration_dirty_pages
  355. - num_dirty_pages_init);
  356. num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
  357. end_time = qemu_get_clock_ms(rt_clock);
  358. /* more than 1 second = 1000 millisecons */
  359. if (end_time > start_time + 1000) {
  360. if (migrate_auto_converge()) {
  361. /* The following detection logic can be refined later. For now:
  362. Check to see if the dirtied bytes is 50% more than the approx.
  363. amount of bytes that just got transferred since the last time we
  364. were in this routine. If that happens >N times (for now N==4)
  365. we turn on the throttle down logic */
  366. bytes_xfer_now = ram_bytes_transferred();
  367. if (s->dirty_pages_rate &&
  368. (num_dirty_pages_period * TARGET_PAGE_SIZE >
  369. (bytes_xfer_now - bytes_xfer_prev)/2) &&
  370. (dirty_rate_high_cnt++ > 4)) {
  371. trace_migration_throttle();
  372. mig_throttle_on = true;
  373. dirty_rate_high_cnt = 0;
  374. }
  375. bytes_xfer_prev = bytes_xfer_now;
  376. } else {
  377. mig_throttle_on = false;
  378. }
  379. s->dirty_pages_rate = num_dirty_pages_period * 1000
  380. / (end_time - start_time);
  381. s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
  382. start_time = end_time;
  383. num_dirty_pages_period = 0;
  384. }
  385. }
  386. /*
  387. * ram_save_block: Writes a page of memory to the stream f
  388. *
  389. * Returns: The number of bytes written.
  390. * 0 means no dirty pages
  391. */
  392. static int ram_save_block(QEMUFile *f, bool last_stage)
  393. {
  394. RAMBlock *block = last_seen_block;
  395. ram_addr_t offset = last_offset;
  396. bool complete_round = false;
  397. int bytes_sent = 0;
  398. MemoryRegion *mr;
  399. ram_addr_t current_addr;
  400. if (!block)
  401. block = QTAILQ_FIRST(&ram_list.blocks);
  402. while (true) {
  403. mr = block->mr;
  404. offset = migration_bitmap_find_and_reset_dirty(mr, offset);
  405. if (complete_round && block == last_seen_block &&
  406. offset >= last_offset) {
  407. break;
  408. }
  409. if (offset >= block->length) {
  410. offset = 0;
  411. block = QTAILQ_NEXT(block, next);
  412. if (!block) {
  413. block = QTAILQ_FIRST(&ram_list.blocks);
  414. complete_round = true;
  415. ram_bulk_stage = false;
  416. }
  417. } else {
  418. int ret;
  419. uint8_t *p;
  420. int cont = (block == last_sent_block) ?
  421. RAM_SAVE_FLAG_CONTINUE : 0;
  422. p = memory_region_get_ram_ptr(mr) + offset;
  423. /* In doubt sent page as normal */
  424. bytes_sent = -1;
  425. ret = ram_control_save_page(f, block->offset,
  426. offset, TARGET_PAGE_SIZE, &bytes_sent);
  427. if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
  428. if (ret != RAM_SAVE_CONTROL_DELAYED) {
  429. if (bytes_sent > 0) {
  430. acct_info.norm_pages++;
  431. } else if (bytes_sent == 0) {
  432. acct_info.dup_pages++;
  433. }
  434. }
  435. } else if (is_zero_page(p)) {
  436. acct_info.dup_pages++;
  437. bytes_sent = save_block_hdr(f, block, offset, cont,
  438. RAM_SAVE_FLAG_COMPRESS);
  439. qemu_put_byte(f, 0);
  440. bytes_sent++;
  441. } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
  442. current_addr = block->offset + offset;
  443. bytes_sent = save_xbzrle_page(f, p, current_addr, block,
  444. offset, cont, last_stage);
  445. if (!last_stage) {
  446. p = get_cached_data(XBZRLE.cache, current_addr);
  447. }
  448. }
  449. /* XBZRLE overflow or normal page */
  450. if (bytes_sent == -1) {
  451. bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
  452. qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
  453. bytes_sent += TARGET_PAGE_SIZE;
  454. acct_info.norm_pages++;
  455. }
  456. /* if page is unmodified, continue to the next */
  457. if (bytes_sent > 0) {
  458. last_sent_block = block;
  459. break;
  460. }
  461. }
  462. }
  463. last_seen_block = block;
  464. last_offset = offset;
  465. return bytes_sent;
  466. }
  467. static uint64_t bytes_transferred;
  468. void acct_update_position(QEMUFile *f, size_t size, bool zero)
  469. {
  470. uint64_t pages = size / TARGET_PAGE_SIZE;
  471. if (zero) {
  472. acct_info.dup_pages += pages;
  473. } else {
  474. acct_info.norm_pages += pages;
  475. bytes_transferred += size;
  476. qemu_update_position(f, size);
  477. }
  478. }
  479. static ram_addr_t ram_save_remaining(void)
  480. {
  481. return migration_dirty_pages;
  482. }
  483. uint64_t ram_bytes_remaining(void)
  484. {
  485. return ram_save_remaining() * TARGET_PAGE_SIZE;
  486. }
  487. uint64_t ram_bytes_transferred(void)
  488. {
  489. return bytes_transferred;
  490. }
  491. uint64_t ram_bytes_total(void)
  492. {
  493. RAMBlock *block;
  494. uint64_t total = 0;
  495. QTAILQ_FOREACH(block, &ram_list.blocks, next)
  496. total += block->length;
  497. return total;
  498. }
  499. static void migration_end(void)
  500. {
  501. if (migration_bitmap) {
  502. memory_global_dirty_log_stop();
  503. g_free(migration_bitmap);
  504. migration_bitmap = NULL;
  505. }
  506. if (XBZRLE.cache) {
  507. cache_fini(XBZRLE.cache);
  508. g_free(XBZRLE.cache);
  509. g_free(XBZRLE.encoded_buf);
  510. g_free(XBZRLE.current_buf);
  511. g_free(XBZRLE.decoded_buf);
  512. XBZRLE.cache = NULL;
  513. }
  514. }
  515. static void ram_migration_cancel(void *opaque)
  516. {
  517. migration_end();
  518. }
  519. static void reset_ram_globals(void)
  520. {
  521. last_seen_block = NULL;
  522. last_sent_block = NULL;
  523. last_offset = 0;
  524. last_version = ram_list.version;
  525. ram_bulk_stage = true;
  526. }
  527. #define MAX_WAIT 50 /* ms, half buffered_file limit */
  528. static int ram_save_setup(QEMUFile *f, void *opaque)
  529. {
  530. RAMBlock *block;
  531. int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
  532. migration_bitmap = bitmap_new(ram_pages);
  533. bitmap_set(migration_bitmap, 0, ram_pages);
  534. migration_dirty_pages = ram_pages;
  535. mig_throttle_on = false;
  536. dirty_rate_high_cnt = 0;
  537. if (migrate_use_xbzrle()) {
  538. XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
  539. TARGET_PAGE_SIZE,
  540. TARGET_PAGE_SIZE);
  541. if (!XBZRLE.cache) {
  542. DPRINTF("Error creating cache\n");
  543. return -1;
  544. }
  545. XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
  546. XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
  547. acct_clear();
  548. }
  549. qemu_mutex_lock_iothread();
  550. qemu_mutex_lock_ramlist();
  551. bytes_transferred = 0;
  552. reset_ram_globals();
  553. memory_global_dirty_log_start();
  554. migration_bitmap_sync();
  555. qemu_mutex_unlock_iothread();
  556. qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
  557. QTAILQ_FOREACH(block, &ram_list.blocks, next) {
  558. qemu_put_byte(f, strlen(block->idstr));
  559. qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
  560. qemu_put_be64(f, block->length);
  561. }
  562. qemu_mutex_unlock_ramlist();
  563. ram_control_before_iterate(f, RAM_CONTROL_SETUP);
  564. ram_control_after_iterate(f, RAM_CONTROL_SETUP);
  565. qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
  566. return 0;
  567. }
  568. static int ram_save_iterate(QEMUFile *f, void *opaque)
  569. {
  570. int ret;
  571. int i;
  572. int64_t t0;
  573. int total_sent = 0;
  574. qemu_mutex_lock_ramlist();
  575. if (ram_list.version != last_version) {
  576. reset_ram_globals();
  577. }
  578. ram_control_before_iterate(f, RAM_CONTROL_ROUND);
  579. t0 = qemu_get_clock_ns(rt_clock);
  580. i = 0;
  581. while ((ret = qemu_file_rate_limit(f)) == 0) {
  582. int bytes_sent;
  583. bytes_sent = ram_save_block(f, false);
  584. /* no more blocks to sent */
  585. if (bytes_sent == 0) {
  586. break;
  587. }
  588. total_sent += bytes_sent;
  589. acct_info.iterations++;
  590. check_guest_throttling();
  591. /* we want to check in the 1st loop, just in case it was the 1st time
  592. and we had to sync the dirty bitmap.
  593. qemu_get_clock_ns() is a bit expensive, so we only check each some
  594. iterations
  595. */
  596. if ((i & 63) == 0) {
  597. uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
  598. if (t1 > MAX_WAIT) {
  599. DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
  600. t1, i);
  601. break;
  602. }
  603. }
  604. i++;
  605. }
  606. qemu_mutex_unlock_ramlist();
  607. /*
  608. * Must occur before EOS (or any QEMUFile operation)
  609. * because of RDMA protocol.
  610. */
  611. ram_control_after_iterate(f, RAM_CONTROL_ROUND);
  612. if (ret < 0) {
  613. bytes_transferred += total_sent;
  614. return ret;
  615. }
  616. qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
  617. total_sent += 8;
  618. bytes_transferred += total_sent;
  619. return total_sent;
  620. }
  621. static int ram_save_complete(QEMUFile *f, void *opaque)
  622. {
  623. qemu_mutex_lock_ramlist();
  624. migration_bitmap_sync();
  625. ram_control_before_iterate(f, RAM_CONTROL_FINISH);
  626. /* try transferring iterative blocks of memory */
  627. /* flush all remaining blocks regardless of rate limiting */
  628. while (true) {
  629. int bytes_sent;
  630. bytes_sent = ram_save_block(f, true);
  631. /* no more blocks to sent */
  632. if (bytes_sent == 0) {
  633. break;
  634. }
  635. bytes_transferred += bytes_sent;
  636. }
  637. ram_control_after_iterate(f, RAM_CONTROL_FINISH);
  638. migration_end();
  639. qemu_mutex_unlock_ramlist();
  640. qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
  641. return 0;
  642. }
  643. static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
  644. {
  645. uint64_t remaining_size;
  646. remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
  647. if (remaining_size < max_size) {
  648. qemu_mutex_lock_iothread();
  649. migration_bitmap_sync();
  650. qemu_mutex_unlock_iothread();
  651. remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
  652. }
  653. return remaining_size;
  654. }
  655. static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
  656. {
  657. int ret, rc = 0;
  658. unsigned int xh_len;
  659. int xh_flags;
  660. if (!XBZRLE.decoded_buf) {
  661. XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
  662. }
  663. /* extract RLE header */
  664. xh_flags = qemu_get_byte(f);
  665. xh_len = qemu_get_be16(f);
  666. if (xh_flags != ENCODING_FLAG_XBZRLE) {
  667. fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
  668. return -1;
  669. }
  670. if (xh_len > TARGET_PAGE_SIZE) {
  671. fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
  672. return -1;
  673. }
  674. /* load data and decode */
  675. qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
  676. /* decode RLE */
  677. ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
  678. TARGET_PAGE_SIZE);
  679. if (ret == -1) {
  680. fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
  681. rc = -1;
  682. } else if (ret > TARGET_PAGE_SIZE) {
  683. fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
  684. ret, TARGET_PAGE_SIZE);
  685. abort();
  686. }
  687. return rc;
  688. }
  689. static inline void *host_from_stream_offset(QEMUFile *f,
  690. ram_addr_t offset,
  691. int flags)
  692. {
  693. static RAMBlock *block = NULL;
  694. char id[256];
  695. uint8_t len;
  696. if (flags & RAM_SAVE_FLAG_CONTINUE) {
  697. if (!block) {
  698. fprintf(stderr, "Ack, bad migration stream!\n");
  699. return NULL;
  700. }
  701. return memory_region_get_ram_ptr(block->mr) + offset;
  702. }
  703. len = qemu_get_byte(f);
  704. qemu_get_buffer(f, (uint8_t *)id, len);
  705. id[len] = 0;
  706. QTAILQ_FOREACH(block, &ram_list.blocks, next) {
  707. if (!strncmp(id, block->idstr, sizeof(id)))
  708. return memory_region_get_ram_ptr(block->mr) + offset;
  709. }
  710. fprintf(stderr, "Can't find block %s!\n", id);
  711. return NULL;
  712. }
  713. /*
  714. * If a page (or a whole RDMA chunk) has been
  715. * determined to be zero, then zap it.
  716. */
  717. void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
  718. {
  719. if (ch != 0 || !is_zero_page(host)) {
  720. memset(host, ch, size);
  721. #ifndef _WIN32
  722. if (ch == 0 &&
  723. (!kvm_enabled() || kvm_has_sync_mmu()) &&
  724. getpagesize() <= TARGET_PAGE_SIZE) {
  725. qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
  726. }
  727. #endif
  728. }
  729. }
  730. static int ram_load(QEMUFile *f, void *opaque, int version_id)
  731. {
  732. ram_addr_t addr;
  733. int flags, ret = 0;
  734. int error;
  735. static uint64_t seq_iter;
  736. seq_iter++;
  737. if (version_id < 4 || version_id > 4) {
  738. return -EINVAL;
  739. }
  740. do {
  741. addr = qemu_get_be64(f);
  742. flags = addr & ~TARGET_PAGE_MASK;
  743. addr &= TARGET_PAGE_MASK;
  744. if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
  745. if (version_id == 4) {
  746. /* Synchronize RAM block list */
  747. char id[256];
  748. ram_addr_t length;
  749. ram_addr_t total_ram_bytes = addr;
  750. while (total_ram_bytes) {
  751. RAMBlock *block;
  752. uint8_t len;
  753. len = qemu_get_byte(f);
  754. qemu_get_buffer(f, (uint8_t *)id, len);
  755. id[len] = 0;
  756. length = qemu_get_be64(f);
  757. QTAILQ_FOREACH(block, &ram_list.blocks, next) {
  758. if (!strncmp(id, block->idstr, sizeof(id))) {
  759. if (block->length != length) {
  760. fprintf(stderr,
  761. "Length mismatch: %s: " RAM_ADDR_FMT
  762. " in != " RAM_ADDR_FMT "\n", id, length,
  763. block->length);
  764. ret = -EINVAL;
  765. goto done;
  766. }
  767. break;
  768. }
  769. }
  770. if (!block) {
  771. fprintf(stderr, "Unknown ramblock \"%s\", cannot "
  772. "accept migration\n", id);
  773. ret = -EINVAL;
  774. goto done;
  775. }
  776. total_ram_bytes -= length;
  777. }
  778. }
  779. }
  780. if (flags & RAM_SAVE_FLAG_COMPRESS) {
  781. void *host;
  782. uint8_t ch;
  783. host = host_from_stream_offset(f, addr, flags);
  784. if (!host) {
  785. return -EINVAL;
  786. }
  787. ch = qemu_get_byte(f);
  788. ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
  789. } else if (flags & RAM_SAVE_FLAG_PAGE) {
  790. void *host;
  791. host = host_from_stream_offset(f, addr, flags);
  792. if (!host) {
  793. return -EINVAL;
  794. }
  795. qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
  796. } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
  797. void *host = host_from_stream_offset(f, addr, flags);
  798. if (!host) {
  799. return -EINVAL;
  800. }
  801. if (load_xbzrle(f, addr, host) < 0) {
  802. ret = -EINVAL;
  803. goto done;
  804. }
  805. } else if (flags & RAM_SAVE_FLAG_HOOK) {
  806. ram_control_load_hook(f, flags);
  807. }
  808. error = qemu_file_get_error(f);
  809. if (error) {
  810. ret = error;
  811. goto done;
  812. }
  813. } while (!(flags & RAM_SAVE_FLAG_EOS));
  814. done:
  815. DPRINTF("Completed load of VM with exit code %d seq iteration "
  816. "%" PRIu64 "\n", ret, seq_iter);
  817. return ret;
  818. }
  819. SaveVMHandlers savevm_ram_handlers = {
  820. .save_live_setup = ram_save_setup,
  821. .save_live_iterate = ram_save_iterate,
  822. .save_live_complete = ram_save_complete,
  823. .save_live_pending = ram_save_pending,
  824. .load_state = ram_load,
  825. .cancel = ram_migration_cancel,
  826. };
  827. struct soundhw {
  828. const char *name;
  829. const char *descr;
  830. int enabled;
  831. int isa;
  832. union {
  833. int (*init_isa) (ISABus *bus);
  834. int (*init_pci) (PCIBus *bus);
  835. } init;
  836. };
  837. static struct soundhw soundhw[9];
  838. static int soundhw_count;
  839. void isa_register_soundhw(const char *name, const char *descr,
  840. int (*init_isa)(ISABus *bus))
  841. {
  842. assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
  843. soundhw[soundhw_count].name = name;
  844. soundhw[soundhw_count].descr = descr;
  845. soundhw[soundhw_count].isa = 1;
  846. soundhw[soundhw_count].init.init_isa = init_isa;
  847. soundhw_count++;
  848. }
  849. void pci_register_soundhw(const char *name, const char *descr,
  850. int (*init_pci)(PCIBus *bus))
  851. {
  852. assert(soundhw_count < ARRAY_SIZE(soundhw) - 1);
  853. soundhw[soundhw_count].name = name;
  854. soundhw[soundhw_count].descr = descr;
  855. soundhw[soundhw_count].isa = 0;
  856. soundhw[soundhw_count].init.init_pci = init_pci;
  857. soundhw_count++;
  858. }
  859. void select_soundhw(const char *optarg)
  860. {
  861. struct soundhw *c;
  862. if (is_help_option(optarg)) {
  863. show_valid_cards:
  864. if (soundhw_count) {
  865. printf("Valid sound card names (comma separated):\n");
  866. for (c = soundhw; c->name; ++c) {
  867. printf ("%-11s %s\n", c->name, c->descr);
  868. }
  869. printf("\n-soundhw all will enable all of the above\n");
  870. } else {
  871. printf("Machine has no user-selectable audio hardware "
  872. "(it may or may not have always-present audio hardware).\n");
  873. }
  874. exit(!is_help_option(optarg));
  875. }
  876. else {
  877. size_t l;
  878. const char *p;
  879. char *e;
  880. int bad_card = 0;
  881. if (!strcmp(optarg, "all")) {
  882. for (c = soundhw; c->name; ++c) {
  883. c->enabled = 1;
  884. }
  885. return;
  886. }
  887. p = optarg;
  888. while (*p) {
  889. e = strchr(p, ',');
  890. l = !e ? strlen(p) : (size_t) (e - p);
  891. for (c = soundhw; c->name; ++c) {
  892. if (!strncmp(c->name, p, l) && !c->name[l]) {
  893. c->enabled = 1;
  894. break;
  895. }
  896. }
  897. if (!c->name) {
  898. if (l > 80) {
  899. fprintf(stderr,
  900. "Unknown sound card name (too big to show)\n");
  901. }
  902. else {
  903. fprintf(stderr, "Unknown sound card name `%.*s'\n",
  904. (int) l, p);
  905. }
  906. bad_card = 1;
  907. }
  908. p += l + (e != NULL);
  909. }
  910. if (bad_card) {
  911. goto show_valid_cards;
  912. }
  913. }
  914. }
  915. void audio_init(void)
  916. {
  917. struct soundhw *c;
  918. ISABus *isa_bus = (ISABus *) object_resolve_path_type("", TYPE_ISA_BUS, NULL);
  919. PCIBus *pci_bus = (PCIBus *) object_resolve_path_type("", TYPE_PCI_BUS, NULL);
  920. for (c = soundhw; c->name; ++c) {
  921. if (c->enabled) {
  922. if (c->isa) {
  923. if (!isa_bus) {
  924. fprintf(stderr, "ISA bus not available for %s\n", c->name);
  925. exit(1);
  926. }
  927. c->init.init_isa(isa_bus);
  928. } else {
  929. if (!pci_bus) {
  930. fprintf(stderr, "PCI bus not available for %s\n", c->name);
  931. exit(1);
  932. }
  933. c->init.init_pci(pci_bus);
  934. }
  935. }
  936. }
  937. }
  938. int qemu_uuid_parse(const char *str, uint8_t *uuid)
  939. {
  940. int ret;
  941. if (strlen(str) != 36) {
  942. return -1;
  943. }
  944. ret = sscanf(str, UUID_FMT, &uuid[0], &uuid[1], &uuid[2], &uuid[3],
  945. &uuid[4], &uuid[5], &uuid[6], &uuid[7], &uuid[8], &uuid[9],
  946. &uuid[10], &uuid[11], &uuid[12], &uuid[13], &uuid[14],
  947. &uuid[15]);
  948. if (ret != 16) {
  949. return -1;
  950. }
  951. #ifdef TARGET_I386
  952. smbios_add_field(1, offsetof(struct smbios_type_1, uuid), uuid, 16);
  953. #endif
  954. return 0;
  955. }
  956. void do_acpitable_option(const QemuOpts *opts)
  957. {
  958. #ifdef TARGET_I386
  959. Error *err = NULL;
  960. acpi_table_add(opts, &err);
  961. if (err) {
  962. fprintf(stderr, "Wrong acpi table provided: %s\n",
  963. error_get_pretty(err));
  964. error_free(err);
  965. exit(1);
  966. }
  967. #endif
  968. }
  969. void do_smbios_option(const char *optarg)
  970. {
  971. #ifdef TARGET_I386
  972. if (smbios_entry_add(optarg) < 0) {
  973. exit(1);
  974. }
  975. #endif
  976. }
  977. void cpudef_init(void)
  978. {
  979. #if defined(cpudef_setup)
  980. cpudef_setup(); /* parse cpu definitions in target config file */
  981. #endif
  982. }
  983. int tcg_available(void)
  984. {
  985. return 1;
  986. }
  987. int kvm_available(void)
  988. {
  989. #ifdef CONFIG_KVM
  990. return 1;
  991. #else
  992. return 0;
  993. #endif
  994. }
  995. int xen_available(void)
  996. {
  997. #ifdef CONFIG_XEN
  998. return 1;
  999. #else
  1000. return 0;
  1001. #endif
  1002. }
  1003. TargetInfo *qmp_query_target(Error **errp)
  1004. {
  1005. TargetInfo *info = g_malloc0(sizeof(*info));
  1006. info->arch = g_strdup(TARGET_NAME);
  1007. return info;
  1008. }
  1009. /* Stub function that's gets run on the vcpu when its brought out of the
  1010. VM to run inside qemu via async_run_on_cpu()*/
  1011. static void mig_sleep_cpu(void *opq)
  1012. {
  1013. qemu_mutex_unlock_iothread();
  1014. g_usleep(30*1000);
  1015. qemu_mutex_lock_iothread();
  1016. }
  1017. /* To reduce the dirty rate explicitly disallow the VCPUs from spending
  1018. much time in the VM. The migration thread will try to catchup.
  1019. Workload will experience a performance drop.
  1020. */
  1021. static void mig_throttle_cpu_down(CPUState *cpu, void *data)
  1022. {
  1023. async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
  1024. }
  1025. static void mig_throttle_guest_down(void)
  1026. {
  1027. qemu_mutex_lock_iothread();
  1028. qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
  1029. qemu_mutex_unlock_iothread();
  1030. }
  1031. static void check_guest_throttling(void)
  1032. {
  1033. static int64_t t0;
  1034. int64_t t1;
  1035. if (!mig_throttle_on) {
  1036. return;
  1037. }
  1038. if (!t0) {
  1039. t0 = qemu_get_clock_ns(rt_clock);
  1040. return;
  1041. }
  1042. t1 = qemu_get_clock_ns(rt_clock);
  1043. /* If it has been more than 40 ms since the last time the guest
  1044. * was throttled then do it again.
  1045. */
  1046. if (40 < (t1-t0)/1000000) {
  1047. mig_throttle_guest_down();
  1048. t0 = t1;
  1049. }
  1050. }