dirtyrate.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935
  1. /*
  2. * Dirtyrate implement code
  3. *
  4. * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
  5. *
  6. * Authors:
  7. * Chuan Zheng <zhengchuan@huawei.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include "qemu/osdep.h"
  13. #include "qemu/error-report.h"
  14. #include "hw/core/cpu.h"
  15. #include "qapi/error.h"
  16. #include "exec/ramblock.h"
  17. #include "exec/target_page.h"
  18. #include "qemu/rcu_queue.h"
  19. #include "qemu/main-loop.h"
  20. #include "qapi/qapi-commands-migration.h"
  21. #include "ram.h"
  22. #include "trace.h"
  23. #include "dirtyrate.h"
  24. #include "monitor/hmp.h"
  25. #include "monitor/monitor.h"
  26. #include "qobject/qdict.h"
  27. #include "system/kvm.h"
  28. #include "system/runstate.h"
  29. #include "exec/memory.h"
  30. #include "qemu/xxhash.h"
  31. #include "migration.h"
  32. /*
  33. * total_dirty_pages is procted by BQL and is used
  34. * to stat dirty pages during the period of two
  35. * memory_global_dirty_log_sync
  36. */
  37. uint64_t total_dirty_pages;
  38. typedef struct DirtyPageRecord {
  39. uint64_t start_pages;
  40. uint64_t end_pages;
  41. } DirtyPageRecord;
  42. static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
  43. static struct DirtyRateStat DirtyStat;
  44. static DirtyRateMeasureMode dirtyrate_mode =
  45. DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
  46. static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
  47. {
  48. int64_t current_time;
  49. current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  50. if ((current_time - initial_time) >= msec) {
  51. msec = current_time - initial_time;
  52. } else {
  53. g_usleep((msec + initial_time - current_time) * 1000);
  54. /* g_usleep may overshoot */
  55. msec = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - initial_time;
  56. }
  57. return msec;
  58. }
  59. static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
  60. CPUState *cpu, bool start)
  61. {
  62. if (start) {
  63. dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
  64. } else {
  65. dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
  66. }
  67. }
  68. static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
  69. int64_t calc_time_ms)
  70. {
  71. uint64_t increased_dirty_pages =
  72. dirty_pages.end_pages - dirty_pages.start_pages;
  73. /*
  74. * multiply by 1000ms/s _before_ converting down to megabytes
  75. * to avoid losing precision
  76. */
  77. return qemu_target_pages_to_MiB(increased_dirty_pages * 1000) /
  78. calc_time_ms;
  79. }
  80. void global_dirty_log_change(unsigned int flag, bool start)
  81. {
  82. Error *local_err = NULL;
  83. bool ret;
  84. bql_lock();
  85. if (start) {
  86. ret = memory_global_dirty_log_start(flag, &local_err);
  87. if (!ret) {
  88. error_report_err(local_err);
  89. }
  90. } else {
  91. memory_global_dirty_log_stop(flag);
  92. }
  93. bql_unlock();
  94. }
  95. /*
  96. * global_dirty_log_sync
  97. * 1. sync dirty log from kvm
  98. * 2. stop dirty tracking if needed.
  99. */
  100. static void global_dirty_log_sync(unsigned int flag, bool one_shot)
  101. {
  102. bql_lock();
  103. memory_global_dirty_log_sync(false);
  104. if (one_shot) {
  105. memory_global_dirty_log_stop(flag);
  106. }
  107. bql_unlock();
  108. }
  109. static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
  110. {
  111. CPUState *cpu;
  112. int nvcpu = 0;
  113. CPU_FOREACH(cpu) {
  114. nvcpu++;
  115. }
  116. stat->nvcpu = nvcpu;
  117. stat->rates = g_new0(DirtyRateVcpu, nvcpu);
  118. return g_new0(DirtyPageRecord, nvcpu);
  119. }
  120. static void vcpu_dirty_stat_collect(DirtyPageRecord *records,
  121. bool start)
  122. {
  123. CPUState *cpu;
  124. CPU_FOREACH(cpu) {
  125. record_dirtypages(records, cpu, start);
  126. }
  127. }
  128. int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
  129. VcpuStat *stat,
  130. unsigned int flag,
  131. bool one_shot)
  132. {
  133. DirtyPageRecord *records = NULL;
  134. int64_t init_time_ms;
  135. int64_t duration;
  136. int64_t dirtyrate;
  137. int i = 0;
  138. unsigned int gen_id = 0;
  139. retry:
  140. init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  141. WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
  142. gen_id = cpu_list_generation_id_get();
  143. records = vcpu_dirty_stat_alloc(stat);
  144. vcpu_dirty_stat_collect(records, true);
  145. }
  146. duration = dirty_stat_wait(calc_time_ms, init_time_ms);
  147. global_dirty_log_sync(flag, one_shot);
  148. WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
  149. if (gen_id != cpu_list_generation_id_get()) {
  150. g_free(records);
  151. g_free(stat->rates);
  152. cpu_list_unlock();
  153. goto retry;
  154. }
  155. vcpu_dirty_stat_collect(records, false);
  156. }
  157. for (i = 0; i < stat->nvcpu; i++) {
  158. dirtyrate = do_calculate_dirtyrate(records[i], duration);
  159. stat->rates[i].id = i;
  160. stat->rates[i].dirty_rate = dirtyrate;
  161. trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
  162. }
  163. g_free(records);
  164. return duration;
  165. }
  166. static bool is_calc_time_valid(int64_t msec)
  167. {
  168. if ((msec < MIN_CALC_TIME_MS) || (msec > MAX_CALC_TIME_MS)) {
  169. return false;
  170. }
  171. return true;
  172. }
  173. static bool is_sample_pages_valid(int64_t pages)
  174. {
  175. return pages >= MIN_SAMPLE_PAGE_COUNT &&
  176. pages <= MAX_SAMPLE_PAGE_COUNT;
  177. }
  178. static int dirtyrate_set_state(int *state, int old_state, int new_state)
  179. {
  180. assert(new_state < DIRTY_RATE_STATUS__MAX);
  181. trace_dirtyrate_set_state(DirtyRateStatus_str(new_state));
  182. if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
  183. return 0;
  184. } else {
  185. return -1;
  186. }
  187. }
  188. /* Decimal power of given time unit relative to one second */
  189. static int time_unit_to_power(TimeUnit time_unit)
  190. {
  191. switch (time_unit) {
  192. case TIME_UNIT_SECOND:
  193. return 0;
  194. case TIME_UNIT_MILLISECOND:
  195. return -3;
  196. default:
  197. g_assert_not_reached();
  198. }
  199. }
  200. static int64_t convert_time_unit(int64_t value, TimeUnit unit_from,
  201. TimeUnit unit_to)
  202. {
  203. int power = time_unit_to_power(unit_from) -
  204. time_unit_to_power(unit_to);
  205. while (power < 0) {
  206. value /= 10;
  207. power += 1;
  208. }
  209. while (power > 0) {
  210. value *= 10;
  211. power -= 1;
  212. }
  213. return value;
  214. }
  215. static struct DirtyRateInfo *
  216. query_dirty_rate_info(TimeUnit calc_time_unit)
  217. {
  218. int i;
  219. int64_t dirty_rate = DirtyStat.dirty_rate;
  220. struct DirtyRateInfo *info = g_new0(DirtyRateInfo, 1);
  221. DirtyRateVcpuList *head = NULL, **tail = &head;
  222. info->status = CalculatingState;
  223. info->start_time = DirtyStat.start_time;
  224. info->calc_time = convert_time_unit(DirtyStat.calc_time_ms,
  225. TIME_UNIT_MILLISECOND,
  226. calc_time_unit);
  227. info->calc_time_unit = calc_time_unit;
  228. info->sample_pages = DirtyStat.sample_pages;
  229. info->mode = dirtyrate_mode;
  230. if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
  231. info->has_dirty_rate = true;
  232. info->dirty_rate = dirty_rate;
  233. if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
  234. /*
  235. * set sample_pages with 0 to indicate page sampling
  236. * isn't enabled
  237. **/
  238. info->sample_pages = 0;
  239. info->has_vcpu_dirty_rate = true;
  240. for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
  241. DirtyRateVcpu *rate = g_new0(DirtyRateVcpu, 1);
  242. rate->id = DirtyStat.dirty_ring.rates[i].id;
  243. rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
  244. QAPI_LIST_APPEND(tail, rate);
  245. }
  246. info->vcpu_dirty_rate = head;
  247. }
  248. if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
  249. info->sample_pages = 0;
  250. }
  251. }
  252. trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
  253. return info;
  254. }
  255. static void init_dirtyrate_stat(struct DirtyRateConfig config)
  256. {
  257. DirtyStat.dirty_rate = -1;
  258. DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
  259. DirtyStat.calc_time_ms = config.calc_time_ms;
  260. DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
  261. switch (config.mode) {
  262. case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
  263. DirtyStat.page_sampling.total_dirty_samples = 0;
  264. DirtyStat.page_sampling.total_sample_count = 0;
  265. DirtyStat.page_sampling.total_block_mem_MB = 0;
  266. break;
  267. case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
  268. DirtyStat.dirty_ring.nvcpu = -1;
  269. DirtyStat.dirty_ring.rates = NULL;
  270. break;
  271. default:
  272. break;
  273. }
  274. }
  275. static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
  276. {
  277. /* last calc-dirty-rate qmp use dirty ring mode */
  278. if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
  279. free(DirtyStat.dirty_ring.rates);
  280. DirtyStat.dirty_ring.rates = NULL;
  281. }
  282. }
  283. static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
  284. {
  285. DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
  286. DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
  287. /* size of total pages in MB */
  288. DirtyStat.page_sampling.total_block_mem_MB +=
  289. qemu_target_pages_to_MiB(info->ramblock_pages);
  290. }
  291. static void update_dirtyrate(uint64_t msec)
  292. {
  293. uint64_t dirtyrate;
  294. uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
  295. uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
  296. uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
  297. dirtyrate = total_dirty_samples * total_block_mem_MB *
  298. 1000 / (total_sample_count * msec);
  299. DirtyStat.dirty_rate = dirtyrate;
  300. }
  301. /*
  302. * Compute hash of a single page of size TARGET_PAGE_SIZE.
  303. */
  304. static uint32_t compute_page_hash(void *ptr)
  305. {
  306. size_t page_size = qemu_target_page_size();
  307. uint32_t i;
  308. uint64_t v1, v2, v3, v4;
  309. uint64_t res;
  310. const uint64_t *p = ptr;
  311. v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
  312. v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
  313. v3 = QEMU_XXHASH_SEED + 0;
  314. v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
  315. for (i = 0; i < page_size / 8; i += 4) {
  316. v1 = XXH64_round(v1, p[i + 0]);
  317. v2 = XXH64_round(v2, p[i + 1]);
  318. v3 = XXH64_round(v3, p[i + 2]);
  319. v4 = XXH64_round(v4, p[i + 3]);
  320. }
  321. res = XXH64_mergerounds(v1, v2, v3, v4);
  322. res += page_size;
  323. res = XXH64_avalanche(res);
  324. return (uint32_t)(res & UINT32_MAX);
  325. }
  326. /*
  327. * get hash result for the sampled memory with length of TARGET_PAGE_SIZE
  328. * in ramblock, which starts from ramblock base address.
  329. */
  330. static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
  331. uint64_t vfn)
  332. {
  333. uint32_t hash;
  334. hash = compute_page_hash(info->ramblock_addr +
  335. vfn * qemu_target_page_size());
  336. trace_get_ramblock_vfn_hash(info->idstr, vfn, hash);
  337. return hash;
  338. }
  339. static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
  340. {
  341. unsigned int sample_pages_count;
  342. int i;
  343. GRand *rand;
  344. sample_pages_count = info->sample_pages_count;
  345. /* ramblock size less than one page, return success to skip this ramblock */
  346. if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
  347. return true;
  348. }
  349. info->hash_result = g_try_malloc0_n(sample_pages_count,
  350. sizeof(uint32_t));
  351. if (!info->hash_result) {
  352. return false;
  353. }
  354. info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
  355. sizeof(uint64_t));
  356. if (!info->sample_page_vfn) {
  357. g_free(info->hash_result);
  358. return false;
  359. }
  360. rand = g_rand_new();
  361. for (i = 0; i < sample_pages_count; i++) {
  362. info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
  363. info->ramblock_pages - 1);
  364. info->hash_result[i] = get_ramblock_vfn_hash(info,
  365. info->sample_page_vfn[i]);
  366. }
  367. g_rand_free(rand);
  368. return true;
  369. }
  370. static void get_ramblock_dirty_info(RAMBlock *block,
  371. struct RamblockDirtyInfo *info,
  372. struct DirtyRateConfig *config)
  373. {
  374. uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
  375. gsize len;
  376. /* Right shift 30 bits to calc ramblock size in GB */
  377. info->sample_pages_count = (qemu_ram_get_used_length(block) *
  378. sample_pages_per_gigabytes) >> 30;
  379. /* Right shift TARGET_PAGE_BITS to calc page count */
  380. info->ramblock_pages = qemu_ram_get_used_length(block) >>
  381. qemu_target_page_bits();
  382. info->ramblock_addr = qemu_ram_get_host_addr(block);
  383. len = g_strlcpy(info->idstr, qemu_ram_get_idstr(block),
  384. sizeof(info->idstr));
  385. g_assert(len < sizeof(info->idstr));
  386. }
  387. static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count)
  388. {
  389. int i;
  390. if (!infos) {
  391. return;
  392. }
  393. for (i = 0; i < count; i++) {
  394. g_free(infos[i].sample_page_vfn);
  395. g_free(infos[i].hash_result);
  396. }
  397. g_free(infos);
  398. }
  399. static bool skip_sample_ramblock(RAMBlock *block)
  400. {
  401. /*
  402. * Sample only blocks larger than MIN_RAMBLOCK_SIZE.
  403. */
  404. if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) {
  405. trace_skip_sample_ramblock(block->idstr,
  406. qemu_ram_get_used_length(block));
  407. return true;
  408. }
  409. return false;
  410. }
  411. static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
  412. struct DirtyRateConfig config,
  413. int *block_count)
  414. {
  415. struct RamblockDirtyInfo *info = NULL;
  416. struct RamblockDirtyInfo *dinfo = NULL;
  417. RAMBlock *block = NULL;
  418. int total_count = 0;
  419. int index = 0;
  420. bool ret = false;
  421. RAMBLOCK_FOREACH_MIGRATABLE(block) {
  422. if (skip_sample_ramblock(block)) {
  423. continue;
  424. }
  425. total_count++;
  426. }
  427. dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo));
  428. if (dinfo == NULL) {
  429. goto out;
  430. }
  431. RAMBLOCK_FOREACH_MIGRATABLE(block) {
  432. if (skip_sample_ramblock(block)) {
  433. continue;
  434. }
  435. if (index >= total_count) {
  436. break;
  437. }
  438. info = &dinfo[index];
  439. get_ramblock_dirty_info(block, info, &config);
  440. if (!save_ramblock_hash(info)) {
  441. goto out;
  442. }
  443. index++;
  444. }
  445. ret = true;
  446. out:
  447. *block_count = index;
  448. *block_dinfo = dinfo;
  449. return ret;
  450. }
  451. static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
  452. {
  453. uint32_t hash;
  454. int i;
  455. for (i = 0; i < info->sample_pages_count; i++) {
  456. hash = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
  457. if (hash != info->hash_result[i]) {
  458. trace_calc_page_dirty_rate(info->idstr, hash, info->hash_result[i]);
  459. info->sample_dirty_count++;
  460. }
  461. }
  462. }
  463. static struct RamblockDirtyInfo *
  464. find_block_matched(RAMBlock *block, int count,
  465. struct RamblockDirtyInfo *infos)
  466. {
  467. int i;
  468. for (i = 0; i < count; i++) {
  469. if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) {
  470. break;
  471. }
  472. }
  473. if (i == count) {
  474. return NULL;
  475. }
  476. if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) ||
  477. infos[i].ramblock_pages !=
  478. (qemu_ram_get_used_length(block) >> qemu_target_page_bits())) {
  479. trace_find_page_matched(block->idstr);
  480. return NULL;
  481. }
  482. return &infos[i];
  483. }
  484. static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
  485. int block_count)
  486. {
  487. struct RamblockDirtyInfo *block_dinfo = NULL;
  488. RAMBlock *block = NULL;
  489. RAMBLOCK_FOREACH_MIGRATABLE(block) {
  490. if (skip_sample_ramblock(block)) {
  491. continue;
  492. }
  493. block_dinfo = find_block_matched(block, block_count, info);
  494. if (block_dinfo == NULL) {
  495. continue;
  496. }
  497. calc_page_dirty_rate(block_dinfo);
  498. update_dirtyrate_stat(block_dinfo);
  499. }
  500. if (DirtyStat.page_sampling.total_sample_count == 0) {
  501. return false;
  502. }
  503. return true;
  504. }
  505. static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
  506. bool start)
  507. {
  508. if (start) {
  509. dirty_pages->start_pages = total_dirty_pages;
  510. } else {
  511. dirty_pages->end_pages = total_dirty_pages;
  512. }
  513. }
  514. static inline void dirtyrate_manual_reset_protect(void)
  515. {
  516. RAMBlock *block = NULL;
  517. WITH_RCU_READ_LOCK_GUARD() {
  518. RAMBLOCK_FOREACH_MIGRATABLE(block) {
  519. memory_region_clear_dirty_bitmap(block->mr, 0,
  520. block->used_length);
  521. }
  522. }
  523. }
  524. static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
  525. {
  526. int64_t start_time;
  527. DirtyPageRecord dirty_pages;
  528. Error *local_err = NULL;
  529. bql_lock();
  530. if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
  531. error_report_err(local_err);
  532. }
  533. /*
  534. * 1'round of log sync may return all 1 bits with
  535. * KVM_DIRTY_LOG_INITIALLY_SET enable
  536. * skip it unconditionally and start dirty tracking
  537. * from 2'round of log sync
  538. */
  539. memory_global_dirty_log_sync(false);
  540. /*
  541. * reset page protect manually and unconditionally.
  542. * this make sure kvm dirty log be cleared if
  543. * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
  544. */
  545. dirtyrate_manual_reset_protect();
  546. bql_unlock();
  547. record_dirtypages_bitmap(&dirty_pages, true);
  548. start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  549. DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
  550. DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms, start_time);
  551. /*
  552. * do two things.
  553. * 1. fetch dirty bitmap from kvm
  554. * 2. stop dirty tracking
  555. */
  556. global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
  557. record_dirtypages_bitmap(&dirty_pages, false);
  558. DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages,
  559. DirtyStat.calc_time_ms);
  560. }
  561. static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
  562. {
  563. uint64_t dirtyrate = 0;
  564. uint64_t dirtyrate_sum = 0;
  565. int i = 0;
  566. /* start log sync */
  567. global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
  568. DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
  569. /* calculate vcpu dirtyrate */
  570. DirtyStat.calc_time_ms = vcpu_calculate_dirtyrate(config.calc_time_ms,
  571. &DirtyStat.dirty_ring,
  572. GLOBAL_DIRTY_DIRTY_RATE,
  573. true);
  574. /* calculate vm dirtyrate */
  575. for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
  576. dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
  577. DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
  578. dirtyrate_sum += dirtyrate;
  579. }
  580. DirtyStat.dirty_rate = dirtyrate_sum;
  581. }
  582. static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
  583. {
  584. struct RamblockDirtyInfo *block_dinfo = NULL;
  585. int block_count = 0;
  586. int64_t initial_time;
  587. rcu_read_lock();
  588. initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  589. DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
  590. if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
  591. goto out;
  592. }
  593. rcu_read_unlock();
  594. DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms,
  595. initial_time);
  596. rcu_read_lock();
  597. if (!compare_page_hash_info(block_dinfo, block_count)) {
  598. goto out;
  599. }
  600. update_dirtyrate(DirtyStat.calc_time_ms);
  601. out:
  602. rcu_read_unlock();
  603. free_ramblock_dirty_info(block_dinfo, block_count);
  604. }
  605. static void calculate_dirtyrate(struct DirtyRateConfig config)
  606. {
  607. if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
  608. calculate_dirtyrate_dirty_bitmap(config);
  609. } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
  610. calculate_dirtyrate_dirty_ring(config);
  611. } else {
  612. calculate_dirtyrate_sample_vm(config);
  613. }
  614. trace_dirtyrate_calculate(DirtyStat.dirty_rate);
  615. }
  616. void *get_dirtyrate_thread(void *arg)
  617. {
  618. struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
  619. int ret;
  620. rcu_register_thread();
  621. ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
  622. DIRTY_RATE_STATUS_MEASURING);
  623. if (ret == -1) {
  624. error_report("change dirtyrate state failed.");
  625. return NULL;
  626. }
  627. calculate_dirtyrate(config);
  628. ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
  629. DIRTY_RATE_STATUS_MEASURED);
  630. if (ret == -1) {
  631. error_report("change dirtyrate state failed.");
  632. }
  633. rcu_unregister_thread();
  634. return NULL;
  635. }
  636. void qmp_calc_dirty_rate(int64_t calc_time,
  637. bool has_calc_time_unit,
  638. TimeUnit calc_time_unit,
  639. bool has_sample_pages,
  640. int64_t sample_pages,
  641. bool has_mode,
  642. DirtyRateMeasureMode mode,
  643. Error **errp)
  644. {
  645. static struct DirtyRateConfig config;
  646. QemuThread thread;
  647. int ret;
  648. /*
  649. * If the dirty rate is already being measured, don't attempt to start.
  650. */
  651. if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) {
  652. error_setg(errp, "the dirty rate is already being measured.");
  653. return;
  654. }
  655. int64_t calc_time_ms = convert_time_unit(
  656. calc_time,
  657. has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND,
  658. TIME_UNIT_MILLISECOND
  659. );
  660. if (!is_calc_time_valid(calc_time_ms)) {
  661. error_setg(errp, "Calculation time is out of range [%dms, %dms].",
  662. MIN_CALC_TIME_MS, MAX_CALC_TIME_MS);
  663. return;
  664. }
  665. if (!has_mode) {
  666. mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
  667. }
  668. if (has_sample_pages && mode != DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
  669. error_setg(errp, "sample-pages is used only in page-sampling mode");
  670. return;
  671. }
  672. if (has_sample_pages) {
  673. if (!is_sample_pages_valid(sample_pages)) {
  674. error_setg(errp, "sample-pages is out of range[%d, %d].",
  675. MIN_SAMPLE_PAGE_COUNT,
  676. MAX_SAMPLE_PAGE_COUNT);
  677. return;
  678. }
  679. } else {
  680. sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
  681. }
  682. /*
  683. * dirty ring mode only works when kvm dirty ring is enabled.
  684. * on the contrary, dirty bitmap mode is not.
  685. */
  686. if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
  687. !kvm_dirty_ring_enabled()) ||
  688. ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
  689. kvm_dirty_ring_enabled())) {
  690. error_setg(errp, "mode %s is not enabled, use other method instead.",
  691. DirtyRateMeasureMode_str(mode));
  692. return;
  693. }
  694. /*
  695. * Init calculation state as unstarted.
  696. */
  697. ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
  698. DIRTY_RATE_STATUS_UNSTARTED);
  699. if (ret == -1) {
  700. error_setg(errp, "init dirty rate calculation state failed.");
  701. return;
  702. }
  703. config.calc_time_ms = calc_time_ms;
  704. config.sample_pages_per_gigabytes = sample_pages;
  705. config.mode = mode;
  706. cleanup_dirtyrate_stat(config);
  707. /*
  708. * update dirty rate mode so that we can figure out what mode has
  709. * been used in last calculation
  710. **/
  711. dirtyrate_mode = mode;
  712. init_dirtyrate_stat(config);
  713. qemu_thread_create(&thread, MIGRATION_THREAD_DIRTY_RATE,
  714. get_dirtyrate_thread, (void *)&config,
  715. QEMU_THREAD_DETACHED);
  716. }
  717. struct DirtyRateInfo *qmp_query_dirty_rate(bool has_calc_time_unit,
  718. TimeUnit calc_time_unit,
  719. Error **errp)
  720. {
  721. return query_dirty_rate_info(
  722. has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND);
  723. }
  724. void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
  725. {
  726. DirtyRateInfo *info = query_dirty_rate_info(TIME_UNIT_SECOND);
  727. monitor_printf(mon, "Status: %s\n",
  728. DirtyRateStatus_str(info->status));
  729. monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
  730. info->start_time);
  731. if (info->mode == DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
  732. monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
  733. info->sample_pages);
  734. }
  735. monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
  736. info->calc_time);
  737. monitor_printf(mon, "Mode: %s\n",
  738. DirtyRateMeasureMode_str(info->mode));
  739. monitor_printf(mon, "Dirty rate: ");
  740. if (info->has_dirty_rate) {
  741. monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
  742. if (info->has_vcpu_dirty_rate) {
  743. DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
  744. for (rate = head; rate != NULL; rate = rate->next) {
  745. monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
  746. " (MB/s)\n", rate->value->id,
  747. rate->value->dirty_rate);
  748. }
  749. }
  750. } else {
  751. monitor_printf(mon, "(not ready)\n");
  752. }
  753. qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
  754. g_free(info);
  755. }
  756. void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
  757. {
  758. int64_t sec = qdict_get_try_int(qdict, "second", 0);
  759. int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
  760. bool has_sample_pages = (sample_pages != -1);
  761. bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
  762. bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
  763. DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
  764. Error *err = NULL;
  765. if (!sec) {
  766. monitor_printf(mon, "Incorrect period length specified!\n");
  767. return;
  768. }
  769. if (dirty_ring && dirty_bitmap) {
  770. monitor_printf(mon, "Either dirty ring or dirty bitmap "
  771. "can be specified!\n");
  772. return;
  773. }
  774. if (dirty_bitmap) {
  775. mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
  776. } else if (dirty_ring) {
  777. mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
  778. }
  779. qmp_calc_dirty_rate(sec, /* calc-time */
  780. false, TIME_UNIT_SECOND, /* calc-time-unit */
  781. has_sample_pages, sample_pages,
  782. true, mode,
  783. &err);
  784. if (err) {
  785. hmp_handle_error(mon, err);
  786. return;
  787. }
  788. monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
  789. " seconds\n", sec);
  790. monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
  791. }