region.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899
  1. /*
  2. * Memory region management for Tiny Code Generator for QEMU
  3. *
  4. * Copyright (c) 2008 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "qemu/osdep.h"
  25. #include "qemu/units.h"
  26. #include "qemu/madvise.h"
  27. #include "qemu/mprotect.h"
  28. #include "qemu/memalign.h"
  29. #include "qemu/cacheinfo.h"
  30. #include "qemu/qtree.h"
  31. #include "qapi/error.h"
  32. #include "exec/exec-all.h"
  33. #include "tcg/tcg.h"
  34. #include "tcg-internal.h"
  35. struct tcg_region_tree {
  36. QemuMutex lock;
  37. QTree *tree;
  38. /* padding to avoid false sharing is computed at run-time */
  39. };
  40. /*
  41. * We divide code_gen_buffer into equally-sized "regions" that TCG threads
  42. * dynamically allocate from as demand dictates. Given appropriate region
  43. * sizing, this minimizes flushes even when some TCG threads generate a lot
  44. * more code than others.
  45. */
  46. struct tcg_region_state {
  47. QemuMutex lock;
  48. /* fields set at init time */
  49. void *start_aligned;
  50. void *after_prologue;
  51. size_t n;
  52. size_t size; /* size of one region */
  53. size_t stride; /* .size + guard size */
  54. size_t total_size; /* size of entire buffer, >= n * stride */
  55. /* fields protected by the lock */
  56. size_t current; /* current region index */
  57. size_t agg_size_full; /* aggregate size of full regions */
  58. };
  59. static struct tcg_region_state region;
  60. /*
  61. * This is an array of struct tcg_region_tree's, with padding.
  62. * We use void * to simplify the computation of region_trees[i]; each
  63. * struct is found every tree_size bytes.
  64. */
  65. static void *region_trees;
  66. static size_t tree_size;
  67. bool in_code_gen_buffer(const void *p)
  68. {
  69. /*
  70. * Much like it is valid to have a pointer to the byte past the
  71. * end of an array (so long as you don't dereference it), allow
  72. * a pointer to the byte past the end of the code gen buffer.
  73. */
  74. return (size_t)(p - region.start_aligned) <= region.total_size;
  75. }
  76. #ifdef CONFIG_DEBUG_TCG
  77. const void *tcg_splitwx_to_rx(void *rw)
  78. {
  79. /* Pass NULL pointers unchanged. */
  80. if (rw) {
  81. g_assert(in_code_gen_buffer(rw));
  82. rw += tcg_splitwx_diff;
  83. }
  84. return rw;
  85. }
  86. void *tcg_splitwx_to_rw(const void *rx)
  87. {
  88. /* Pass NULL pointers unchanged. */
  89. if (rx) {
  90. rx -= tcg_splitwx_diff;
  91. /* Assert that we end with a pointer in the rw region. */
  92. g_assert(in_code_gen_buffer(rx));
  93. }
  94. return (void *)rx;
  95. }
  96. #endif /* CONFIG_DEBUG_TCG */
  97. /* compare a pointer @ptr and a tb_tc @s */
  98. static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
  99. {
  100. if (ptr >= s->ptr + s->size) {
  101. return 1;
  102. } else if (ptr < s->ptr) {
  103. return -1;
  104. }
  105. return 0;
  106. }
  107. static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
  108. {
  109. const struct tb_tc *a = ap;
  110. const struct tb_tc *b = bp;
  111. /*
  112. * When both sizes are set, we know this isn't a lookup.
  113. * This is the most likely case: every TB must be inserted; lookups
  114. * are a lot less frequent.
  115. */
  116. if (likely(a->size && b->size)) {
  117. if (a->ptr > b->ptr) {
  118. return 1;
  119. } else if (a->ptr < b->ptr) {
  120. return -1;
  121. }
  122. /* a->ptr == b->ptr should happen only on deletions */
  123. g_assert(a->size == b->size);
  124. return 0;
  125. }
  126. /*
  127. * All lookups have either .size field set to 0.
  128. * From the glib sources we see that @ap is always the lookup key. However
  129. * the docs provide no guarantee, so we just mark this case as likely.
  130. */
  131. if (likely(a->size == 0)) {
  132. return ptr_cmp_tb_tc(a->ptr, b);
  133. }
  134. return ptr_cmp_tb_tc(b->ptr, a);
  135. }
  136. static void tb_destroy(gpointer value)
  137. {
  138. TranslationBlock *tb = value;
  139. qemu_spin_destroy(&tb->jmp_lock);
  140. }
  141. static void tcg_region_trees_init(void)
  142. {
  143. size_t i;
  144. tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
  145. region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
  146. for (i = 0; i < region.n; i++) {
  147. struct tcg_region_tree *rt = region_trees + i * tree_size;
  148. qemu_mutex_init(&rt->lock);
  149. rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
  150. }
  151. }
  152. static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
  153. {
  154. size_t region_idx;
  155. /*
  156. * Like tcg_splitwx_to_rw, with no assert. The pc may come from
  157. * a signal handler over which the caller has no control.
  158. */
  159. if (!in_code_gen_buffer(p)) {
  160. p -= tcg_splitwx_diff;
  161. if (!in_code_gen_buffer(p)) {
  162. return NULL;
  163. }
  164. }
  165. if (p < region.start_aligned) {
  166. region_idx = 0;
  167. } else {
  168. ptrdiff_t offset = p - region.start_aligned;
  169. if (offset > region.stride * (region.n - 1)) {
  170. region_idx = region.n - 1;
  171. } else {
  172. region_idx = offset / region.stride;
  173. }
  174. }
  175. return region_trees + region_idx * tree_size;
  176. }
  177. void tcg_tb_insert(TranslationBlock *tb)
  178. {
  179. struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
  180. g_assert(rt != NULL);
  181. qemu_mutex_lock(&rt->lock);
  182. q_tree_insert(rt->tree, &tb->tc, tb);
  183. qemu_mutex_unlock(&rt->lock);
  184. }
  185. void tcg_tb_remove(TranslationBlock *tb)
  186. {
  187. struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
  188. g_assert(rt != NULL);
  189. qemu_mutex_lock(&rt->lock);
  190. q_tree_remove(rt->tree, &tb->tc);
  191. qemu_mutex_unlock(&rt->lock);
  192. }
  193. /*
  194. * Find the TB 'tb' such that
  195. * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
  196. * Return NULL if not found.
  197. */
  198. TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
  199. {
  200. struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
  201. TranslationBlock *tb;
  202. struct tb_tc s = { .ptr = (void *)tc_ptr };
  203. if (rt == NULL) {
  204. return NULL;
  205. }
  206. qemu_mutex_lock(&rt->lock);
  207. tb = q_tree_lookup(rt->tree, &s);
  208. qemu_mutex_unlock(&rt->lock);
  209. return tb;
  210. }
  211. static void tcg_region_tree_lock_all(void)
  212. {
  213. size_t i;
  214. for (i = 0; i < region.n; i++) {
  215. struct tcg_region_tree *rt = region_trees + i * tree_size;
  216. qemu_mutex_lock(&rt->lock);
  217. }
  218. }
  219. static void tcg_region_tree_unlock_all(void)
  220. {
  221. size_t i;
  222. for (i = 0; i < region.n; i++) {
  223. struct tcg_region_tree *rt = region_trees + i * tree_size;
  224. qemu_mutex_unlock(&rt->lock);
  225. }
  226. }
  227. void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
  228. {
  229. size_t i;
  230. tcg_region_tree_lock_all();
  231. for (i = 0; i < region.n; i++) {
  232. struct tcg_region_tree *rt = region_trees + i * tree_size;
  233. q_tree_foreach(rt->tree, func, user_data);
  234. }
  235. tcg_region_tree_unlock_all();
  236. }
  237. size_t tcg_nb_tbs(void)
  238. {
  239. size_t nb_tbs = 0;
  240. size_t i;
  241. tcg_region_tree_lock_all();
  242. for (i = 0; i < region.n; i++) {
  243. struct tcg_region_tree *rt = region_trees + i * tree_size;
  244. nb_tbs += q_tree_nnodes(rt->tree);
  245. }
  246. tcg_region_tree_unlock_all();
  247. return nb_tbs;
  248. }
  249. static void tcg_region_tree_reset_all(void)
  250. {
  251. size_t i;
  252. tcg_region_tree_lock_all();
  253. for (i = 0; i < region.n; i++) {
  254. struct tcg_region_tree *rt = region_trees + i * tree_size;
  255. /* Increment the refcount first so that destroy acts as a reset */
  256. q_tree_ref(rt->tree);
  257. q_tree_destroy(rt->tree);
  258. }
  259. tcg_region_tree_unlock_all();
  260. }
  261. static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
  262. {
  263. void *start, *end;
  264. start = region.start_aligned + curr_region * region.stride;
  265. end = start + region.size;
  266. if (curr_region == 0) {
  267. start = region.after_prologue;
  268. }
  269. /* The final region may have a few extra pages due to earlier rounding. */
  270. if (curr_region == region.n - 1) {
  271. end = region.start_aligned + region.total_size;
  272. }
  273. *pstart = start;
  274. *pend = end;
  275. }
  276. static void tcg_region_assign(TCGContext *s, size_t curr_region)
  277. {
  278. void *start, *end;
  279. tcg_region_bounds(curr_region, &start, &end);
  280. s->code_gen_buffer = start;
  281. s->code_gen_ptr = start;
  282. s->code_gen_buffer_size = end - start;
  283. s->code_gen_highwater = end - TCG_HIGHWATER;
  284. }
  285. static bool tcg_region_alloc__locked(TCGContext *s)
  286. {
  287. if (region.current == region.n) {
  288. return true;
  289. }
  290. tcg_region_assign(s, region.current);
  291. region.current++;
  292. return false;
  293. }
  294. /*
  295. * Request a new region once the one in use has filled up.
  296. * Returns true on error.
  297. */
  298. bool tcg_region_alloc(TCGContext *s)
  299. {
  300. bool err;
  301. /* read the region size now; alloc__locked will overwrite it on success */
  302. size_t size_full = s->code_gen_buffer_size;
  303. qemu_mutex_lock(&region.lock);
  304. err = tcg_region_alloc__locked(s);
  305. if (!err) {
  306. region.agg_size_full += size_full - TCG_HIGHWATER;
  307. }
  308. qemu_mutex_unlock(&region.lock);
  309. return err;
  310. }
  311. /*
  312. * Perform a context's first region allocation.
  313. * This function does _not_ increment region.agg_size_full.
  314. */
  315. static void tcg_region_initial_alloc__locked(TCGContext *s)
  316. {
  317. bool err = tcg_region_alloc__locked(s);
  318. g_assert(!err);
  319. }
  320. void tcg_region_initial_alloc(TCGContext *s)
  321. {
  322. qemu_mutex_lock(&region.lock);
  323. tcg_region_initial_alloc__locked(s);
  324. qemu_mutex_unlock(&region.lock);
  325. }
  326. /* Call from a safe-work context */
  327. void tcg_region_reset_all(void)
  328. {
  329. unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
  330. unsigned int i;
  331. qemu_mutex_lock(&region.lock);
  332. region.current = 0;
  333. region.agg_size_full = 0;
  334. for (i = 0; i < n_ctxs; i++) {
  335. TCGContext *s = qatomic_read(&tcg_ctxs[i]);
  336. tcg_region_initial_alloc__locked(s);
  337. }
  338. qemu_mutex_unlock(&region.lock);
  339. tcg_region_tree_reset_all();
  340. }
  341. static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
  342. {
  343. #ifdef CONFIG_USER_ONLY
  344. return 1;
  345. #else
  346. size_t n_regions;
  347. /*
  348. * It is likely that some vCPUs will translate more code than others,
  349. * so we first try to set more regions than max_cpus, with those regions
  350. * being of reasonable size. If that's not possible we make do by evenly
  351. * dividing the code_gen_buffer among the vCPUs.
  352. */
  353. /* Use a single region if all we have is one vCPU thread */
  354. if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
  355. return 1;
  356. }
  357. /*
  358. * Try to have more regions than max_cpus, with each region being >= 2 MB.
  359. * If we can't, then just allocate one region per vCPU thread.
  360. */
  361. n_regions = tb_size / (2 * MiB);
  362. if (n_regions <= max_cpus) {
  363. return max_cpus;
  364. }
  365. return MIN(n_regions, max_cpus * 8);
  366. #endif
  367. }
  368. /*
  369. * Minimum size of the code gen buffer. This number is randomly chosen,
  370. * but not so small that we can't have a fair number of TB's live.
  371. *
  372. * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
  373. * Unless otherwise indicated, this is constrained by the range of
  374. * direct branches on the host cpu, as used by the TCG implementation
  375. * of goto_tb.
  376. */
  377. #define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
  378. #if TCG_TARGET_REG_BITS == 32
  379. #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
  380. #ifdef CONFIG_USER_ONLY
  381. /*
  382. * For user mode on smaller 32 bit systems we may run into trouble
  383. * allocating big chunks of data in the right place. On these systems
  384. * we utilise a static code generation buffer directly in the binary.
  385. */
  386. #define USE_STATIC_CODE_GEN_BUFFER
  387. #endif
  388. #else /* TCG_TARGET_REG_BITS == 64 */
  389. #ifdef CONFIG_USER_ONLY
  390. /*
  391. * As user-mode emulation typically means running multiple instances
  392. * of the translator don't go too nuts with our default code gen
  393. * buffer lest we make things too hard for the OS.
  394. */
  395. #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
  396. #else
  397. /*
  398. * We expect most system emulation to run one or two guests per host.
  399. * Users running large scale system emulation may want to tweak their
  400. * runtime setup via the tb-size control on the command line.
  401. */
  402. #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
  403. #endif
  404. #endif
  405. #define DEFAULT_CODE_GEN_BUFFER_SIZE \
  406. (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
  407. ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
  408. #ifdef USE_STATIC_CODE_GEN_BUFFER
  409. static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
  410. __attribute__((aligned(CODE_GEN_ALIGN)));
  411. static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
  412. {
  413. void *buf, *end;
  414. size_t size;
  415. if (splitwx > 0) {
  416. error_setg(errp, "jit split-wx not supported");
  417. return -1;
  418. }
  419. /* page-align the beginning and end of the buffer */
  420. buf = static_code_gen_buffer;
  421. end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
  422. buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size());
  423. end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size());
  424. size = end - buf;
  425. /* Honor a command-line option limiting the size of the buffer. */
  426. if (size > tb_size) {
  427. size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size());
  428. }
  429. region.start_aligned = buf;
  430. region.total_size = size;
  431. return PROT_READ | PROT_WRITE;
  432. }
  433. #elif defined(_WIN32)
  434. static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
  435. {
  436. void *buf;
  437. if (splitwx > 0) {
  438. error_setg(errp, "jit split-wx not supported");
  439. return -1;
  440. }
  441. buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
  442. PAGE_EXECUTE_READWRITE);
  443. if (buf == NULL) {
  444. error_setg_win32(errp, GetLastError(),
  445. "allocate %zu bytes for jit buffer", size);
  446. return false;
  447. }
  448. region.start_aligned = buf;
  449. region.total_size = size;
  450. return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
  451. }
  452. #else
  453. static int alloc_code_gen_buffer_anon(size_t size, int prot,
  454. int flags, Error **errp)
  455. {
  456. void *buf;
  457. buf = mmap(NULL, size, prot, flags, -1, 0);
  458. if (buf == MAP_FAILED) {
  459. error_setg_errno(errp, errno,
  460. "allocate %zu bytes for jit buffer", size);
  461. return -1;
  462. }
  463. region.start_aligned = buf;
  464. region.total_size = size;
  465. return prot;
  466. }
  467. #if !defined(CONFIG_TCG_INTERPRETER) && !defined(CONFIG_TCG_THREADED_INTERPRETER)
  468. #ifdef CONFIG_POSIX
  469. #include "qemu/memfd.h"
  470. static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
  471. {
  472. void *buf_rw = NULL, *buf_rx = MAP_FAILED;
  473. int fd = -1;
  474. buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
  475. if (buf_rw == NULL) {
  476. goto fail;
  477. }
  478. buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
  479. if (buf_rx == MAP_FAILED) {
  480. goto fail_rx;
  481. }
  482. close(fd);
  483. region.start_aligned = buf_rw;
  484. region.total_size = size;
  485. tcg_splitwx_diff = buf_rx - buf_rw;
  486. return PROT_READ | PROT_WRITE;
  487. fail_rx:
  488. error_setg_errno(errp, errno, "failed to map shared memory for execute");
  489. fail:
  490. if (buf_rx != MAP_FAILED) {
  491. munmap(buf_rx, size);
  492. }
  493. if (buf_rw) {
  494. munmap(buf_rw, size);
  495. }
  496. if (fd >= 0) {
  497. close(fd);
  498. }
  499. return -1;
  500. }
  501. #endif /* CONFIG_POSIX */
  502. #ifdef CONFIG_DARWIN
  503. #include <mach/mach.h>
  504. extern kern_return_t mach_vm_remap(vm_map_t target_task,
  505. mach_vm_address_t *target_address,
  506. mach_vm_size_t size,
  507. mach_vm_offset_t mask,
  508. int flags,
  509. vm_map_t src_task,
  510. mach_vm_address_t src_address,
  511. boolean_t copy,
  512. vm_prot_t *cur_protection,
  513. vm_prot_t *max_protection,
  514. vm_inherit_t inheritance);
  515. static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
  516. {
  517. kern_return_t ret;
  518. mach_vm_address_t buf_rw, buf_rx;
  519. vm_prot_t cur_prot, max_prot;
  520. /* Map the read-write portion via normal anon memory. */
  521. if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
  522. MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
  523. return -1;
  524. }
  525. buf_rw = (mach_vm_address_t)region.start_aligned;
  526. buf_rx = 0;
  527. ret = mach_vm_remap(mach_task_self(),
  528. &buf_rx,
  529. size,
  530. 0,
  531. VM_FLAGS_ANYWHERE,
  532. mach_task_self(),
  533. buf_rw,
  534. false,
  535. &cur_prot,
  536. &max_prot,
  537. VM_INHERIT_NONE);
  538. if (ret != KERN_SUCCESS) {
  539. /* TODO: Convert "ret" to a human readable error message. */
  540. error_setg(errp, "vm_remap for jit splitwx failed");
  541. munmap((void *)buf_rw, size);
  542. return -1;
  543. }
  544. if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
  545. error_setg_errno(errp, errno, "mprotect for jit splitwx");
  546. munmap((void *)buf_rx, size);
  547. munmap((void *)buf_rw, size);
  548. return -1;
  549. }
  550. tcg_splitwx_diff = buf_rx - buf_rw;
  551. return PROT_READ | PROT_WRITE;
  552. }
  553. #endif /* CONFIG_DARWIN */
  554. #endif /* CONFIG_TCG_INTERPRETER */
  555. static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
  556. {
  557. #if !defined(CONFIG_TCG_INTERPRETER) && !defined(CONFIG_TCG_THREADED_INTERPRETER)
  558. # ifdef CONFIG_DARWIN
  559. return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
  560. # endif
  561. # ifdef CONFIG_POSIX
  562. return alloc_code_gen_buffer_splitwx_memfd(size, errp);
  563. # endif
  564. #endif
  565. error_setg(errp, "jit split-wx not supported");
  566. return -1;
  567. }
  568. static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
  569. {
  570. ERRP_GUARD();
  571. int prot, flags;
  572. if (splitwx) {
  573. prot = alloc_code_gen_buffer_splitwx(size, errp);
  574. if (prot >= 0) {
  575. return prot;
  576. }
  577. /*
  578. * If splitwx force-on (1), fail;
  579. * if splitwx default-on (-1), fall through to splitwx off.
  580. */
  581. if (splitwx > 0) {
  582. return -1;
  583. }
  584. error_free_or_abort(errp);
  585. }
  586. /*
  587. * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
  588. * rejects a permission change from RWX -> NONE when reserving the
  589. * guard pages later. We can go the other way with the same number
  590. * of syscalls, so always begin with PROT_NONE.
  591. */
  592. prot = PROT_NONE;
  593. flags = MAP_PRIVATE | MAP_ANONYMOUS;
  594. #if defined(CONFIG_TCG_INTERPRETER) || defined(CONFIG_TCG_THREADED_INTERPRETER)
  595. /* The tcg interpreter does not need execute permission. */
  596. prot = PROT_READ | PROT_WRITE;
  597. #elif defined(CONFIG_DARWIN)
  598. /* Applicable to both iOS and macOS (Apple Silicon). */
  599. if (!splitwx) {
  600. flags |= MAP_JIT;
  601. }
  602. #endif
  603. return alloc_code_gen_buffer_anon(size, prot, flags, errp);
  604. }
  605. #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
  606. /*
  607. * Initializes region partitioning.
  608. *
  609. * Called at init time from the parent thread (i.e. the one calling
  610. * tcg_context_init), after the target's TCG globals have been set.
  611. *
  612. * Region partitioning works by splitting code_gen_buffer into separate regions,
  613. * and then assigning regions to TCG threads so that the threads can translate
  614. * code in parallel without synchronization.
  615. *
  616. * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
  617. * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
  618. * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
  619. * must have been parsed before calling this function, since it calls
  620. * qemu_tcg_mttcg_enabled().
  621. *
  622. * In user-mode we use a single region. Having multiple regions in user-mode
  623. * is not supported, because the number of vCPU threads (recall that each thread
  624. * spawned by the guest corresponds to a vCPU thread) is only bounded by the
  625. * OS, and usually this number is huge (tens of thousands is not uncommon).
  626. * Thus, given this large bound on the number of vCPU threads and the fact
  627. * that code_gen_buffer is allocated at compile-time, we cannot guarantee
  628. * that the availability of at least one region per vCPU thread.
  629. *
  630. * However, this user-mode limitation is unlikely to be a significant problem
  631. * in practice. Multi-threaded guests share most if not all of their translated
  632. * code, which makes parallel code generation less appealing than in softmmu.
  633. */
  634. void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
  635. {
  636. const size_t page_size = qemu_real_host_page_size();
  637. size_t region_size;
  638. int have_prot, need_prot;
  639. /* Size the buffer. */
  640. if (tb_size == 0) {
  641. size_t phys_mem = qemu_get_host_physmem();
  642. if (phys_mem == 0) {
  643. tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
  644. } else {
  645. tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
  646. tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
  647. }
  648. }
  649. if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
  650. tb_size = MIN_CODE_GEN_BUFFER_SIZE;
  651. }
  652. if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
  653. tb_size = MAX_CODE_GEN_BUFFER_SIZE;
  654. }
  655. have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
  656. assert(have_prot >= 0);
  657. /* Request large pages for the buffer and the splitwx. */
  658. qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
  659. if (tcg_splitwx_diff) {
  660. qemu_madvise(region.start_aligned + tcg_splitwx_diff,
  661. region.total_size, QEMU_MADV_HUGEPAGE);
  662. }
  663. /*
  664. * Make region_size a multiple of page_size, using aligned as the start.
  665. * As a result of this we might end up with a few extra pages at the end of
  666. * the buffer; we will assign those to the last region.
  667. */
  668. region.n = tcg_n_regions(tb_size, max_cpus);
  669. region_size = tb_size / region.n;
  670. region_size = QEMU_ALIGN_DOWN(region_size, page_size);
  671. /* A region must have at least 2 pages; one code, one guard */
  672. g_assert(region_size >= 2 * page_size);
  673. region.stride = region_size;
  674. /* Reserve space for guard pages. */
  675. region.size = region_size - page_size;
  676. region.total_size -= page_size;
  677. /*
  678. * The first region will be smaller than the others, via the prologue,
  679. * which has yet to be allocated. For now, the first region begins at
  680. * the page boundary.
  681. */
  682. region.after_prologue = region.start_aligned;
  683. /* init the region struct */
  684. qemu_mutex_init(&region.lock);
  685. /*
  686. * Set guard pages in the rw buffer, as that's the one into which
  687. * buffer overruns could occur. Do not set guard pages in the rx
  688. * buffer -- let that one use hugepages throughout.
  689. * Work with the page protections set up with the initial mapping.
  690. */
  691. need_prot = PAGE_READ | PAGE_WRITE;
  692. #if !defined(CONFIG_TCG_INTERPRETER) && !defined(CONFIG_TCG_THREADED_INTERPRETER)
  693. if (tcg_splitwx_diff == 0) {
  694. need_prot |= PAGE_EXEC;
  695. }
  696. #endif
  697. for (size_t i = 0, n = region.n; i < n; i++) {
  698. void *start, *end;
  699. tcg_region_bounds(i, &start, &end);
  700. if (have_prot != need_prot) {
  701. int rc;
  702. if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
  703. rc = qemu_mprotect_rwx(start, end - start);
  704. } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
  705. rc = qemu_mprotect_rw(start, end - start);
  706. } else {
  707. g_assert_not_reached();
  708. }
  709. if (rc) {
  710. error_setg_errno(&error_fatal, errno,
  711. "mprotect of jit buffer");
  712. }
  713. }
  714. if (have_prot != 0) {
  715. /* Guard pages are nice for bug detection but are not essential. */
  716. (void)qemu_mprotect_none(end, page_size);
  717. }
  718. }
  719. tcg_region_trees_init();
  720. /*
  721. * Leave the initial context initialized to the first region.
  722. * This will be the context into which we generate the prologue.
  723. * It is also the only context for CONFIG_USER_ONLY.
  724. */
  725. tcg_region_initial_alloc__locked(&tcg_init_ctx);
  726. }
  727. void tcg_region_prologue_set(TCGContext *s)
  728. {
  729. /* Deduct the prologue from the first region. */
  730. g_assert(region.start_aligned == s->code_gen_buffer);
  731. region.after_prologue = s->code_ptr;
  732. /* Recompute boundaries of the first region. */
  733. tcg_region_assign(s, 0);
  734. /* Register the balance of the buffer with gdb. */
  735. tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
  736. region.start_aligned + region.total_size -
  737. region.after_prologue);
  738. }
  739. /*
  740. * Returns the size (in bytes) of all translated code (i.e. from all regions)
  741. * currently in the cache.
  742. * See also: tcg_code_capacity()
  743. * Do not confuse with tcg_current_code_size(); that one applies to a single
  744. * TCG context.
  745. */
  746. size_t tcg_code_size(void)
  747. {
  748. unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
  749. unsigned int i;
  750. size_t total;
  751. qemu_mutex_lock(&region.lock);
  752. total = region.agg_size_full;
  753. for (i = 0; i < n_ctxs; i++) {
  754. const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
  755. size_t size;
  756. size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
  757. g_assert(size <= s->code_gen_buffer_size);
  758. total += size;
  759. }
  760. qemu_mutex_unlock(&region.lock);
  761. return total;
  762. }
  763. /*
  764. * Returns the code capacity (in bytes) of the entire cache, i.e. including all
  765. * regions.
  766. * See also: tcg_code_size()
  767. */
  768. size_t tcg_code_capacity(void)
  769. {
  770. size_t guard_size, capacity;
  771. /* no need for synchronization; these variables are set at init time */
  772. guard_size = region.stride - region.size;
  773. capacity = region.total_size;
  774. capacity -= (region.n - 1) * guard_size;
  775. capacity -= region.n * TCG_HIGHWATER;
  776. return capacity;
  777. }