ram_addr.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /*
  2. * Declarations for cpu physical memory functions
  3. *
  4. * Copyright 2011 Red Hat, Inc. and/or its affiliates
  5. *
  6. * Authors:
  7. * Avi Kivity <avi@redhat.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2 or
  10. * later. See the COPYING file in the top-level directory.
  11. *
  12. */
  13. /*
  14. * This header is for use by exec.c and memory.c ONLY. Do not include it.
  15. * The functions declared here will be removed soon.
  16. */
  17. #ifndef RAM_ADDR_H
  18. #define RAM_ADDR_H
  19. #ifndef CONFIG_USER_ONLY
  20. #include "cpu.h"
  21. #include "system/xen.h"
  22. #include "system/tcg.h"
  23. #include "exec/cputlb.h"
  24. #include "exec/ramlist.h"
  25. #include "exec/ramblock.h"
  26. #include "exec/exec-all.h"
  27. #include "qemu/rcu.h"
  28. #include "exec/hwaddr.h"
  29. #include "exec/cpu-common.h"
  30. extern uint64_t total_dirty_pages;
  31. /**
  32. * clear_bmap_size: calculate clear bitmap size
  33. *
  34. * @pages: number of guest pages
  35. * @shift: guest page number shift
  36. *
  37. * Returns: number of bits for the clear bitmap
  38. */
  39. static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
  40. {
  41. return DIV_ROUND_UP(pages, 1UL << shift);
  42. }
  43. /**
  44. * clear_bmap_set: set clear bitmap for the page range. Must be with
  45. * bitmap_mutex held.
  46. *
  47. * @rb: the ramblock to operate on
  48. * @start: the start page number
  49. * @size: number of pages to set in the bitmap
  50. *
  51. * Returns: None
  52. */
  53. static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
  54. uint64_t npages)
  55. {
  56. uint8_t shift = rb->clear_bmap_shift;
  57. bitmap_set(rb->clear_bmap, start >> shift, clear_bmap_size(npages, shift));
  58. }
  59. /**
  60. * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set.
  61. * Must be with bitmap_mutex held.
  62. *
  63. * @rb: the ramblock to operate on
  64. * @page: the page number to check
  65. *
  66. * Returns: true if the bit was set, false otherwise
  67. */
  68. static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
  69. {
  70. uint8_t shift = rb->clear_bmap_shift;
  71. return bitmap_test_and_clear(rb->clear_bmap, page >> shift, 1);
  72. }
  73. static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
  74. {
  75. return (b && b->host && offset < b->used_length) ? true : false;
  76. }
  77. static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
  78. {
  79. assert(offset_in_ramblock(block, offset));
  80. return (char *)block->host + offset;
  81. }
  82. static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
  83. RAMBlock *rb)
  84. {
  85. uint64_t host_addr_offset =
  86. (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
  87. return host_addr_offset >> TARGET_PAGE_BITS;
  88. }
  89. bool ramblock_is_pmem(RAMBlock *rb);
  90. /**
  91. * qemu_ram_alloc_from_file,
  92. * qemu_ram_alloc_from_fd: Allocate a ram block from the specified backing
  93. * file or device
  94. *
  95. * Parameters:
  96. * @size: the size in bytes of the ram block
  97. * @max_size: the maximum size of the block after resizing
  98. * @mr: the memory region where the ram block is
  99. * @resized: callback after calls to qemu_ram_resize
  100. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
  101. * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
  102. * RAM_READONLY_FD, RAM_GUEST_MEMFD
  103. * @mem_path or @fd: specify the backing file or device
  104. * @offset: Offset into target file
  105. * @grow: extend file if necessary (but an empty file is always extended).
  106. * @errp: pointer to Error*, to store an error if it happens
  107. *
  108. * Return:
  109. * On success, return a pointer to the ram block.
  110. * On failure, return NULL.
  111. */
  112. typedef void (*qemu_ram_resize_cb)(const char *, uint64_t length, void *host);
  113. RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
  114. uint32_t ram_flags, const char *mem_path,
  115. off_t offset, Error **errp);
  116. RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, ram_addr_t max_size,
  117. qemu_ram_resize_cb resized, MemoryRegion *mr,
  118. uint32_t ram_flags, int fd, off_t offset,
  119. bool grow,
  120. Error **errp);
  121. RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
  122. MemoryRegion *mr, Error **errp);
  123. RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr,
  124. Error **errp);
  125. RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
  126. qemu_ram_resize_cb resized,
  127. MemoryRegion *mr, Error **errp);
  128. void qemu_ram_free(RAMBlock *block);
  129. int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
  130. void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length);
  131. /* Clear whole block of mem */
  132. static inline void qemu_ram_block_writeback(RAMBlock *block)
  133. {
  134. qemu_ram_msync(block, 0, block->used_length);
  135. }
  136. #define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1)
  137. #define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
  138. static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
  139. ram_addr_t length,
  140. unsigned client)
  141. {
  142. DirtyMemoryBlocks *blocks;
  143. unsigned long end, page;
  144. unsigned long idx, offset, base;
  145. bool dirty = false;
  146. assert(client < DIRTY_MEMORY_NUM);
  147. end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
  148. page = start >> TARGET_PAGE_BITS;
  149. WITH_RCU_READ_LOCK_GUARD() {
  150. blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
  151. idx = page / DIRTY_MEMORY_BLOCK_SIZE;
  152. offset = page % DIRTY_MEMORY_BLOCK_SIZE;
  153. base = page - offset;
  154. while (page < end) {
  155. unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
  156. unsigned long num = next - base;
  157. unsigned long found = find_next_bit(blocks->blocks[idx],
  158. num, offset);
  159. if (found < num) {
  160. dirty = true;
  161. break;
  162. }
  163. page = next;
  164. idx++;
  165. offset = 0;
  166. base += DIRTY_MEMORY_BLOCK_SIZE;
  167. }
  168. }
  169. return dirty;
  170. }
  171. static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
  172. ram_addr_t length,
  173. unsigned client)
  174. {
  175. DirtyMemoryBlocks *blocks;
  176. unsigned long end, page;
  177. unsigned long idx, offset, base;
  178. bool dirty = true;
  179. assert(client < DIRTY_MEMORY_NUM);
  180. end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
  181. page = start >> TARGET_PAGE_BITS;
  182. RCU_READ_LOCK_GUARD();
  183. blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
  184. idx = page / DIRTY_MEMORY_BLOCK_SIZE;
  185. offset = page % DIRTY_MEMORY_BLOCK_SIZE;
  186. base = page - offset;
  187. while (page < end) {
  188. unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
  189. unsigned long num = next - base;
  190. unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
  191. if (found < num) {
  192. dirty = false;
  193. break;
  194. }
  195. page = next;
  196. idx++;
  197. offset = 0;
  198. base += DIRTY_MEMORY_BLOCK_SIZE;
  199. }
  200. return dirty;
  201. }
  202. static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
  203. unsigned client)
  204. {
  205. return cpu_physical_memory_get_dirty(addr, 1, client);
  206. }
  207. static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
  208. {
  209. bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
  210. bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
  211. bool migration =
  212. cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
  213. return !(vga && code && migration);
  214. }
  215. static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
  216. ram_addr_t length,
  217. uint8_t mask)
  218. {
  219. uint8_t ret = 0;
  220. if (mask & (1 << DIRTY_MEMORY_VGA) &&
  221. !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
  222. ret |= (1 << DIRTY_MEMORY_VGA);
  223. }
  224. if (mask & (1 << DIRTY_MEMORY_CODE) &&
  225. !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
  226. ret |= (1 << DIRTY_MEMORY_CODE);
  227. }
  228. if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
  229. !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
  230. ret |= (1 << DIRTY_MEMORY_MIGRATION);
  231. }
  232. return ret;
  233. }
  234. static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
  235. unsigned client)
  236. {
  237. unsigned long page, idx, offset;
  238. DirtyMemoryBlocks *blocks;
  239. assert(client < DIRTY_MEMORY_NUM);
  240. page = addr >> TARGET_PAGE_BITS;
  241. idx = page / DIRTY_MEMORY_BLOCK_SIZE;
  242. offset = page % DIRTY_MEMORY_BLOCK_SIZE;
  243. RCU_READ_LOCK_GUARD();
  244. blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]);
  245. set_bit_atomic(offset, blocks->blocks[idx]);
  246. }
  247. static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
  248. ram_addr_t length,
  249. uint8_t mask)
  250. {
  251. DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
  252. unsigned long end, page;
  253. unsigned long idx, offset, base;
  254. int i;
  255. if (!mask && !xen_enabled()) {
  256. return;
  257. }
  258. end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
  259. page = start >> TARGET_PAGE_BITS;
  260. WITH_RCU_READ_LOCK_GUARD() {
  261. for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
  262. blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]);
  263. }
  264. idx = page / DIRTY_MEMORY_BLOCK_SIZE;
  265. offset = page % DIRTY_MEMORY_BLOCK_SIZE;
  266. base = page - offset;
  267. while (page < end) {
  268. unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
  269. if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
  270. bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
  271. offset, next - page);
  272. }
  273. if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
  274. bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
  275. offset, next - page);
  276. }
  277. if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
  278. bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
  279. offset, next - page);
  280. }
  281. page = next;
  282. idx++;
  283. offset = 0;
  284. base += DIRTY_MEMORY_BLOCK_SIZE;
  285. }
  286. }
  287. xen_hvm_modified_memory(start, length);
  288. }
  289. #if !defined(_WIN32)
  290. /*
  291. * Contrary to cpu_physical_memory_sync_dirty_bitmap() this function returns
  292. * the number of dirty pages in @bitmap passed as argument. On the other hand,
  293. * cpu_physical_memory_sync_dirty_bitmap() returns newly dirtied pages that
  294. * weren't set in the global migration bitmap.
  295. */
  296. static inline
  297. uint64_t cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
  298. ram_addr_t start,
  299. ram_addr_t pages)
  300. {
  301. unsigned long i, j;
  302. unsigned long page_number, c, nbits;
  303. hwaddr addr;
  304. ram_addr_t ram_addr;
  305. uint64_t num_dirty = 0;
  306. unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
  307. unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE;
  308. unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
  309. /* start address is aligned at the start of a word? */
  310. if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
  311. (hpratio == 1)) {
  312. unsigned long **blocks[DIRTY_MEMORY_NUM];
  313. unsigned long idx;
  314. unsigned long offset;
  315. long k;
  316. long nr = BITS_TO_LONGS(pages);
  317. idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
  318. offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
  319. DIRTY_MEMORY_BLOCK_SIZE);
  320. WITH_RCU_READ_LOCK_GUARD() {
  321. for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
  322. blocks[i] =
  323. qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
  324. }
  325. for (k = 0; k < nr; k++) {
  326. if (bitmap[k]) {
  327. unsigned long temp = leul_to_cpu(bitmap[k]);
  328. nbits = ctpopl(temp);
  329. qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
  330. if (global_dirty_tracking) {
  331. qatomic_or(
  332. &blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
  333. temp);
  334. if (unlikely(
  335. global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
  336. total_dirty_pages += nbits;
  337. }
  338. }
  339. num_dirty += nbits;
  340. if (tcg_enabled()) {
  341. qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
  342. temp);
  343. }
  344. }
  345. if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
  346. offset = 0;
  347. idx++;
  348. }
  349. }
  350. }
  351. xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
  352. } else {
  353. uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
  354. if (!global_dirty_tracking) {
  355. clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
  356. }
  357. /*
  358. * bitmap-traveling is faster than memory-traveling (for addr...)
  359. * especially when most of the memory is not dirty.
  360. */
  361. for (i = 0; i < len; i++) {
  362. if (bitmap[i] != 0) {
  363. c = leul_to_cpu(bitmap[i]);
  364. nbits = ctpopl(c);
  365. if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) {
  366. total_dirty_pages += nbits;
  367. }
  368. num_dirty += nbits;
  369. do {
  370. j = ctzl(c);
  371. c &= ~(1ul << j);
  372. page_number = (i * HOST_LONG_BITS + j) * hpratio;
  373. addr = page_number * TARGET_PAGE_SIZE;
  374. ram_addr = start + addr;
  375. cpu_physical_memory_set_dirty_range(ram_addr,
  376. TARGET_PAGE_SIZE * hpratio, clients);
  377. } while (c != 0);
  378. }
  379. }
  380. }
  381. return num_dirty;
  382. }
  383. #endif /* not _WIN32 */
  384. static inline void cpu_physical_memory_dirty_bits_cleared(ram_addr_t start,
  385. ram_addr_t length)
  386. {
  387. if (tcg_enabled()) {
  388. tlb_reset_dirty_range_all(start, length);
  389. }
  390. }
  391. bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
  392. ram_addr_t length,
  393. unsigned client);
  394. DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
  395. (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client);
  396. bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
  397. ram_addr_t start,
  398. ram_addr_t length);
  399. static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
  400. ram_addr_t length)
  401. {
  402. cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
  403. cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
  404. cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
  405. }
  406. /* Called with RCU critical section */
  407. static inline
  408. uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
  409. ram_addr_t start,
  410. ram_addr_t length)
  411. {
  412. ram_addr_t addr;
  413. unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
  414. uint64_t num_dirty = 0;
  415. unsigned long *dest = rb->bmap;
  416. /* start address and length is aligned at the start of a word? */
  417. if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
  418. (start + rb->offset) &&
  419. !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
  420. int k;
  421. int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
  422. unsigned long * const *src;
  423. unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
  424. unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
  425. DIRTY_MEMORY_BLOCK_SIZE);
  426. unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
  427. src = qatomic_rcu_read(
  428. &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
  429. for (k = page; k < page + nr; k++) {
  430. if (src[idx][offset]) {
  431. unsigned long bits = qatomic_xchg(&src[idx][offset], 0);
  432. unsigned long new_dirty;
  433. new_dirty = ~dest[k];
  434. dest[k] |= bits;
  435. new_dirty &= bits;
  436. num_dirty += ctpopl(new_dirty);
  437. }
  438. if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
  439. offset = 0;
  440. idx++;
  441. }
  442. }
  443. if (num_dirty) {
  444. cpu_physical_memory_dirty_bits_cleared(start, length);
  445. }
  446. if (rb->clear_bmap) {
  447. /*
  448. * Postpone the dirty bitmap clear to the point before we
  449. * really send the pages, also we will split the clear
  450. * dirty procedure into smaller chunks.
  451. */
  452. clear_bmap_set(rb, start >> TARGET_PAGE_BITS,
  453. length >> TARGET_PAGE_BITS);
  454. } else {
  455. /* Slow path - still do that in a huge chunk */
  456. memory_region_clear_dirty_bitmap(rb->mr, start, length);
  457. }
  458. } else {
  459. ram_addr_t offset = rb->offset;
  460. for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
  461. if (cpu_physical_memory_test_and_clear_dirty(
  462. start + addr + offset,
  463. TARGET_PAGE_SIZE,
  464. DIRTY_MEMORY_MIGRATION)) {
  465. long k = (start + addr) >> TARGET_PAGE_BITS;
  466. if (!test_and_set_bit(k, dest)) {
  467. num_dirty++;
  468. }
  469. }
  470. }
  471. }
  472. return num_dirty;
  473. }
  474. #endif
  475. #endif