cpu-exec.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121
  1. /*
  2. * emulator main execution loop
  3. *
  4. * Copyright (c) 2003-2005 Fabrice Bellard
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "qemu/osdep.h"
  20. #include "qemu/qemu-print.h"
  21. #include "qapi/error.h"
  22. #include "qapi/type-helpers.h"
  23. #include "hw/core/tcg-cpu-ops.h"
  24. #include "trace.h"
  25. #include "disas/disas.h"
  26. #include "exec/exec-all.h"
  27. #include "tcg/tcg.h"
  28. #include "qemu/atomic.h"
  29. #include "qemu/rcu.h"
  30. #include "exec/log.h"
  31. #include "qemu/main-loop.h"
  32. #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
  33. #include "hw/i386/apic.h"
  34. #endif
  35. #include "sysemu/cpus.h"
  36. #include "exec/cpu-all.h"
  37. #include "sysemu/cpu-timers.h"
  38. #include "exec/replay-core.h"
  39. #include "sysemu/tcg.h"
  40. #include "exec/helper-proto-common.h"
  41. #include "tb-jmp-cache.h"
  42. #include "tb-hash.h"
  43. #include "tb-context.h"
  44. #include "internal-common.h"
  45. #include "internal-target.h"
  46. /* -icount align implementation. */
  47. typedef struct SyncClocks {
  48. int64_t diff_clk;
  49. int64_t last_cpu_icount;
  50. int64_t realtime_clock;
  51. } SyncClocks;
  52. #if !defined(CONFIG_USER_ONLY)
  53. /* Allow the guest to have a max 3ms advance.
  54. * The difference between the 2 clocks could therefore
  55. * oscillate around 0.
  56. */
  57. #define VM_CLOCK_ADVANCE 3000000
  58. #define THRESHOLD_REDUCE 1.5
  59. #define MAX_DELAY_PRINT_RATE 2000000000LL
  60. #define MAX_NB_PRINTS 100
  61. int64_t max_delay;
  62. int64_t max_advance;
  63. static void align_clocks(SyncClocks *sc, CPUState *cpu)
  64. {
  65. int64_t cpu_icount;
  66. if (!icount_align_option) {
  67. return;
  68. }
  69. cpu_icount = cpu->icount_extra + cpu->neg.icount_decr.u16.low;
  70. sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
  71. sc->last_cpu_icount = cpu_icount;
  72. if (sc->diff_clk > VM_CLOCK_ADVANCE) {
  73. #ifndef _WIN32
  74. struct timespec sleep_delay, rem_delay;
  75. sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
  76. sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
  77. if (nanosleep(&sleep_delay, &rem_delay) < 0) {
  78. sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
  79. } else {
  80. sc->diff_clk = 0;
  81. }
  82. #else
  83. Sleep(sc->diff_clk / SCALE_MS);
  84. sc->diff_clk = 0;
  85. #endif
  86. }
  87. }
  88. static void print_delay(const SyncClocks *sc)
  89. {
  90. static float threshold_delay;
  91. static int64_t last_realtime_clock;
  92. static int nb_prints;
  93. if (icount_align_option &&
  94. sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
  95. nb_prints < MAX_NB_PRINTS) {
  96. if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
  97. (-sc->diff_clk / (float)1000000000LL <
  98. (threshold_delay - THRESHOLD_REDUCE))) {
  99. threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
  100. qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
  101. threshold_delay - 1,
  102. threshold_delay);
  103. nb_prints++;
  104. last_realtime_clock = sc->realtime_clock;
  105. }
  106. }
  107. }
  108. static void init_delay_params(SyncClocks *sc, CPUState *cpu)
  109. {
  110. if (!icount_align_option) {
  111. return;
  112. }
  113. sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
  114. sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
  115. sc->last_cpu_icount
  116. = cpu->icount_extra + cpu->neg.icount_decr.u16.low;
  117. if (sc->diff_clk < max_delay) {
  118. max_delay = sc->diff_clk;
  119. }
  120. if (sc->diff_clk > max_advance) {
  121. max_advance = sc->diff_clk;
  122. }
  123. /* Print every 2s max if the guest is late. We limit the number
  124. of printed messages to NB_PRINT_MAX(currently 100) */
  125. print_delay(sc);
  126. }
  127. #else
  128. static void align_clocks(SyncClocks *sc, const CPUState *cpu)
  129. {
  130. }
  131. static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
  132. {
  133. }
  134. #endif /* CONFIG USER ONLY */
  135. uint32_t curr_cflags(CPUState *cpu)
  136. {
  137. uint32_t cflags = cpu->tcg_cflags;
  138. /*
  139. * Record gdb single-step. We should be exiting the TB by raising
  140. * EXCP_DEBUG, but to simplify other tests, disable chaining too.
  141. *
  142. * For singlestep and -d nochain, suppress goto_tb so that
  143. * we can log -d cpu,exec after every TB.
  144. */
  145. if (unlikely(cpu->singlestep_enabled)) {
  146. cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
  147. } else if (qatomic_read(&one_insn_per_tb)) {
  148. cflags |= CF_NO_GOTO_TB | 1;
  149. } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
  150. cflags |= CF_NO_GOTO_TB;
  151. }
  152. return cflags;
  153. }
  154. struct tb_desc {
  155. vaddr pc;
  156. uint64_t cs_base;
  157. CPUArchState *env;
  158. tb_page_addr_t page_addr0;
  159. uint32_t flags;
  160. uint32_t cflags;
  161. };
  162. static bool tb_lookup_cmp(const void *p, const void *d)
  163. {
  164. const TranslationBlock *tb = p;
  165. const struct tb_desc *desc = d;
  166. if ((tb_cflags(tb) & CF_PCREL || tb->pc == desc->pc) &&
  167. tb_page_addr0(tb) == desc->page_addr0 &&
  168. tb->cs_base == desc->cs_base &&
  169. tb->flags == desc->flags &&
  170. tb_cflags(tb) == desc->cflags) {
  171. /* check next page if needed */
  172. tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
  173. if (tb_phys_page1 == -1) {
  174. return true;
  175. } else {
  176. tb_page_addr_t phys_page1;
  177. vaddr virt_page1;
  178. /*
  179. * We know that the first page matched, and an otherwise valid TB
  180. * encountered an incomplete instruction at the end of that page,
  181. * therefore we know that generating a new TB from the current PC
  182. * must also require reading from the next page -- even if the
  183. * second pages do not match, and therefore the resulting insn
  184. * is different for the new TB. Therefore any exception raised
  185. * here by the faulting lookup is not premature.
  186. */
  187. virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
  188. phys_page1 = get_page_addr_code(desc->env, virt_page1);
  189. if (tb_phys_page1 == phys_page1) {
  190. return true;
  191. }
  192. }
  193. }
  194. return false;
  195. }
  196. static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
  197. uint64_t cs_base, uint32_t flags,
  198. uint32_t cflags)
  199. {
  200. tb_page_addr_t phys_pc;
  201. struct tb_desc desc;
  202. uint32_t h;
  203. desc.env = cpu_env(cpu);
  204. desc.cs_base = cs_base;
  205. desc.flags = flags;
  206. desc.cflags = cflags;
  207. desc.pc = pc;
  208. phys_pc = get_page_addr_code(desc.env, pc);
  209. if (phys_pc == -1) {
  210. return NULL;
  211. }
  212. desc.page_addr0 = phys_pc;
  213. h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
  214. flags, cs_base, cflags);
  215. return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
  216. }
  217. /* Might cause an exception, so have a longjmp destination ready */
  218. static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
  219. uint64_t cs_base, uint32_t flags,
  220. uint32_t cflags)
  221. {
  222. TranslationBlock *tb;
  223. CPUJumpCache *jc;
  224. uint32_t hash;
  225. /* we should never be trying to look up an INVALID tb */
  226. tcg_debug_assert(!(cflags & CF_INVALID));
  227. hash = tb_jmp_cache_hash_func(pc);
  228. jc = cpu->tb_jmp_cache;
  229. if (cflags & CF_PCREL) {
  230. /* Use acquire to ensure current load of pc from jc. */
  231. tb = qatomic_load_acquire(&jc->array[hash].tb);
  232. if (likely(tb &&
  233. jc->array[hash].pc == pc &&
  234. tb->cs_base == cs_base &&
  235. tb->flags == flags &&
  236. tb_cflags(tb) == cflags)) {
  237. return tb;
  238. }
  239. tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
  240. if (tb == NULL) {
  241. return NULL;
  242. }
  243. jc->array[hash].pc = pc;
  244. /* Ensure pc is written first. */
  245. qatomic_store_release(&jc->array[hash].tb, tb);
  246. } else {
  247. /* Use rcu_read to ensure current load of pc from *tb. */
  248. tb = qatomic_rcu_read(&jc->array[hash].tb);
  249. if (likely(tb &&
  250. tb->pc == pc &&
  251. tb->cs_base == cs_base &&
  252. tb->flags == flags &&
  253. tb_cflags(tb) == cflags)) {
  254. return tb;
  255. }
  256. tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
  257. if (tb == NULL) {
  258. return NULL;
  259. }
  260. /* Use the pc value already stored in tb->pc. */
  261. qatomic_set(&jc->array[hash].tb, tb);
  262. }
  263. return tb;
  264. }
  265. static void log_cpu_exec(vaddr pc, CPUState *cpu,
  266. const TranslationBlock *tb)
  267. {
  268. if (qemu_log_in_addr_range(pc)) {
  269. qemu_log_mask(CPU_LOG_EXEC,
  270. "Trace %d: %p [%08" PRIx64
  271. "/%016" VADDR_PRIx "/%08x/%08x] %s\n",
  272. cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
  273. tb->flags, tb->cflags, lookup_symbol(pc));
  274. if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
  275. FILE *logfile = qemu_log_trylock();
  276. if (logfile) {
  277. int flags = 0;
  278. if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
  279. flags |= CPU_DUMP_FPU;
  280. }
  281. #if defined(TARGET_I386)
  282. flags |= CPU_DUMP_CCOP;
  283. #endif
  284. if (qemu_loglevel_mask(CPU_LOG_TB_VPU)) {
  285. flags |= CPU_DUMP_VPU;
  286. }
  287. cpu_dump_state(cpu, logfile, flags);
  288. qemu_log_unlock(logfile);
  289. }
  290. }
  291. }
  292. }
  293. static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
  294. uint32_t *cflags)
  295. {
  296. CPUBreakpoint *bp;
  297. bool match_page = false;
  298. /*
  299. * Singlestep overrides breakpoints.
  300. * This requirement is visible in the record-replay tests, where
  301. * we would fail to make forward progress in reverse-continue.
  302. *
  303. * TODO: gdb singlestep should only override gdb breakpoints,
  304. * so that one could (gdb) singlestep into the guest kernel's
  305. * architectural breakpoint handler.
  306. */
  307. if (cpu->singlestep_enabled) {
  308. return false;
  309. }
  310. QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
  311. /*
  312. * If we have an exact pc match, trigger the breakpoint.
  313. * Otherwise, note matches within the page.
  314. */
  315. if (pc == bp->pc) {
  316. bool match_bp = false;
  317. if (bp->flags & BP_GDB) {
  318. match_bp = true;
  319. } else if (bp->flags & BP_CPU) {
  320. #ifdef CONFIG_USER_ONLY
  321. g_assert_not_reached();
  322. #else
  323. CPUClass *cc = CPU_GET_CLASS(cpu);
  324. assert(cc->tcg_ops->debug_check_breakpoint);
  325. match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
  326. #endif
  327. }
  328. if (match_bp) {
  329. cpu->exception_index = EXCP_DEBUG;
  330. return true;
  331. }
  332. } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
  333. match_page = true;
  334. }
  335. }
  336. /*
  337. * Within the same page as a breakpoint, single-step,
  338. * returning to helper_lookup_tb_ptr after each insn looking
  339. * for the actual breakpoint.
  340. *
  341. * TODO: Perhaps better to record all of the TBs associated
  342. * with a given virtual page that contains a breakpoint, and
  343. * then invalidate them when a new overlapping breakpoint is
  344. * set on the page. Non-overlapping TBs would not be
  345. * invalidated, nor would any TB need to be invalidated as
  346. * breakpoints are removed.
  347. */
  348. if (match_page) {
  349. *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
  350. }
  351. return false;
  352. }
  353. static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc,
  354. uint32_t *cflags)
  355. {
  356. return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
  357. check_for_breakpoints_slow(cpu, pc, cflags);
  358. }
  359. /**
  360. * helper_lookup_tb_ptr: quick check for next tb
  361. * @env: current cpu state
  362. *
  363. * Look for an existing TB matching the current cpu state.
  364. * If found, return the code pointer. If not found, return
  365. * the tcg epilogue so that we return into cpu_tb_exec.
  366. */
  367. const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
  368. {
  369. CPUState *cpu = env_cpu(env);
  370. TranslationBlock *tb;
  371. vaddr pc;
  372. uint64_t cs_base;
  373. uint32_t flags, cflags;
  374. cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
  375. cflags = curr_cflags(cpu);
  376. if (check_for_breakpoints(cpu, pc, &cflags)) {
  377. cpu_loop_exit(cpu);
  378. }
  379. tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
  380. if (tb == NULL) {
  381. return tcg_code_gen_epilogue;
  382. }
  383. if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
  384. log_cpu_exec(pc, cpu, tb);
  385. }
  386. return tb->tc.ptr;
  387. }
  388. /* Execute a TB, and fix up the CPU state afterwards if necessary */
  389. /*
  390. * Disable CFI checks.
  391. * TCG creates binary blobs at runtime, with the transformed code.
  392. * A TB is a blob of binary code, created at runtime and called with an
  393. * indirect function call. Since such function did not exist at compile time,
  394. * the CFI runtime has no way to verify its signature and would fail.
  395. * TCG is not considered a security-sensitive part of QEMU so this does not
  396. * affect the impact of CFI in environment with high security requirements
  397. */
  398. static inline TranslationBlock * QEMU_DISABLE_CFI
  399. cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
  400. {
  401. CPUArchState *env = cpu_env(cpu);
  402. uintptr_t ret;
  403. TranslationBlock *last_tb;
  404. const void *tb_ptr = itb->tc.ptr;
  405. if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
  406. log_cpu_exec(log_pc(cpu, itb), cpu, itb);
  407. }
  408. qemu_thread_jit_execute();
  409. ret = tcg_qemu_tb_exec(env, tb_ptr);
  410. cpu->neg.can_do_io = true;
  411. qemu_plugin_disable_mem_helpers(cpu);
  412. /*
  413. * TODO: Delay swapping back to the read-write region of the TB
  414. * until we actually need to modify the TB. The read-only copy,
  415. * coming from the rx region, shares the same host TLB entry as
  416. * the code that executed the exit_tb opcode that arrived here.
  417. * If we insist on touching both the RX and the RW pages, we
  418. * double the host TLB pressure.
  419. */
  420. last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
  421. *tb_exit = ret & TB_EXIT_MASK;
  422. trace_exec_tb_exit(last_tb, *tb_exit);
  423. if (*tb_exit > TB_EXIT_IDX1) {
  424. /* We didn't start executing this TB (eg because the instruction
  425. * counter hit zero); we must restore the guest PC to the address
  426. * of the start of the TB.
  427. */
  428. CPUClass *cc = CPU_GET_CLASS(cpu);
  429. if (cc->tcg_ops->synchronize_from_tb) {
  430. cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
  431. } else {
  432. tcg_debug_assert(!(tb_cflags(last_tb) & CF_PCREL));
  433. assert(cc->set_pc);
  434. cc->set_pc(cpu, last_tb->pc);
  435. }
  436. if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
  437. vaddr pc = log_pc(cpu, last_tb);
  438. if (qemu_log_in_addr_range(pc)) {
  439. qemu_log("Stopped execution of TB chain before %p [%016"
  440. VADDR_PRIx "] %s\n",
  441. last_tb->tc.ptr, pc, lookup_symbol(pc));
  442. }
  443. }
  444. }
  445. /*
  446. * If gdb single-step, and we haven't raised another exception,
  447. * raise a debug exception. Single-step with another exception
  448. * is handled in cpu_handle_exception.
  449. */
  450. if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
  451. cpu->exception_index = EXCP_DEBUG;
  452. cpu_loop_exit(cpu);
  453. }
  454. return last_tb;
  455. }
  456. static void cpu_exec_enter(CPUState *cpu)
  457. {
  458. CPUClass *cc = CPU_GET_CLASS(cpu);
  459. if (cc->tcg_ops->cpu_exec_enter) {
  460. cc->tcg_ops->cpu_exec_enter(cpu);
  461. }
  462. }
  463. static void cpu_exec_exit(CPUState *cpu)
  464. {
  465. CPUClass *cc = CPU_GET_CLASS(cpu);
  466. if (cc->tcg_ops->cpu_exec_exit) {
  467. cc->tcg_ops->cpu_exec_exit(cpu);
  468. }
  469. }
  470. static void cpu_exec_longjmp_cleanup(CPUState *cpu)
  471. {
  472. /* Non-buggy compilers preserve this; assert the correct value. */
  473. g_assert(cpu == current_cpu);
  474. #ifdef CONFIG_USER_ONLY
  475. clear_helper_retaddr();
  476. if (have_mmap_lock()) {
  477. mmap_unlock();
  478. }
  479. #else
  480. /*
  481. * For softmmu, a tlb_fill fault during translation will land here,
  482. * and we need to release any page locks held. In system mode we
  483. * have one tcg_ctx per thread, so we know it was this cpu doing
  484. * the translation.
  485. *
  486. * Alternative 1: Install a cleanup to be called via an exception
  487. * handling safe longjmp. It seems plausible that all our hosts
  488. * support such a thing. We'd have to properly register unwind info
  489. * for the JIT for EH, rather that just for GDB.
  490. *
  491. * Alternative 2: Set and restore cpu->jmp_env in tb_gen_code to
  492. * capture the cpu_loop_exit longjmp, perform the cleanup, and
  493. * jump again to arrive here.
  494. */
  495. if (tcg_ctx->gen_tb) {
  496. tb_unlock_pages(tcg_ctx->gen_tb);
  497. tcg_ctx->gen_tb = NULL;
  498. }
  499. #endif
  500. if (qemu_mutex_iothread_locked()) {
  501. qemu_mutex_unlock_iothread();
  502. }
  503. assert_no_pages_locked();
  504. }
  505. void cpu_exec_step_atomic(CPUState *cpu)
  506. {
  507. CPUArchState *env = cpu_env(cpu);
  508. TranslationBlock *tb;
  509. vaddr pc;
  510. uint64_t cs_base;
  511. uint32_t flags, cflags;
  512. int tb_exit;
  513. if (sigsetjmp(cpu->jmp_env, 0) == 0) {
  514. start_exclusive();
  515. g_assert(cpu == current_cpu);
  516. g_assert(!cpu->running);
  517. cpu->running = true;
  518. cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
  519. cflags = curr_cflags(cpu);
  520. /* Execute in a serial context. */
  521. cflags &= ~CF_PARALLEL;
  522. /* After 1 insn, return and release the exclusive lock. */
  523. cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
  524. /*
  525. * No need to check_for_breakpoints here.
  526. * We only arrive in cpu_exec_step_atomic after beginning execution
  527. * of an insn that includes an atomic operation we can't handle.
  528. * Any breakpoint for this insn will have been recognized earlier.
  529. */
  530. tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
  531. if (tb == NULL) {
  532. mmap_lock();
  533. tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
  534. mmap_unlock();
  535. }
  536. cpu_exec_enter(cpu);
  537. /* execute the generated code */
  538. trace_exec_tb(tb, pc);
  539. cpu_tb_exec(cpu, tb, &tb_exit);
  540. cpu_exec_exit(cpu);
  541. } else {
  542. cpu_exec_longjmp_cleanup(cpu);
  543. }
  544. /*
  545. * As we start the exclusive region before codegen we must still
  546. * be in the region if we longjump out of either the codegen or
  547. * the execution.
  548. */
  549. g_assert(cpu_in_exclusive_context(cpu));
  550. cpu->running = false;
  551. end_exclusive();
  552. }
  553. void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
  554. {
  555. /*
  556. * Get the rx view of the structure, from which we find the
  557. * executable code address, and tb_target_set_jmp_target can
  558. * produce a pc-relative displacement to jmp_target_addr[n].
  559. */
  560. const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb);
  561. uintptr_t offset = tb->jmp_insn_offset[n];
  562. uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset;
  563. uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
  564. tb->jmp_target_addr[n] = addr;
  565. tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw);
  566. }
  567. static inline void tb_add_jump(TranslationBlock *tb, int n,
  568. TranslationBlock *tb_next)
  569. {
  570. uintptr_t old;
  571. qemu_thread_jit_write();
  572. assert(n < ARRAY_SIZE(tb->jmp_list_next));
  573. qemu_spin_lock(&tb_next->jmp_lock);
  574. /* make sure the destination TB is valid */
  575. if (tb_next->cflags & CF_INVALID) {
  576. goto out_unlock_next;
  577. }
  578. /* Atomically claim the jump destination slot only if it was NULL */
  579. old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
  580. (uintptr_t)tb_next);
  581. if (old) {
  582. goto out_unlock_next;
  583. }
  584. /* patch the native jump address */
  585. tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
  586. /* add in TB jmp list */
  587. tb->jmp_list_next[n] = tb_next->jmp_list_head;
  588. tb_next->jmp_list_head = (uintptr_t)tb | n;
  589. qemu_spin_unlock(&tb_next->jmp_lock);
  590. qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
  591. tb->tc.ptr, n, tb_next->tc.ptr);
  592. return;
  593. out_unlock_next:
  594. qemu_spin_unlock(&tb_next->jmp_lock);
  595. return;
  596. }
  597. static inline bool cpu_handle_halt(CPUState *cpu)
  598. {
  599. #ifndef CONFIG_USER_ONLY
  600. if (cpu->halted) {
  601. #if defined(TARGET_I386)
  602. if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
  603. X86CPU *x86_cpu = X86_CPU(cpu);
  604. qemu_mutex_lock_iothread();
  605. apic_poll_irq(x86_cpu->apic_state);
  606. cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
  607. qemu_mutex_unlock_iothread();
  608. }
  609. #endif /* TARGET_I386 */
  610. if (!cpu_has_work(cpu)) {
  611. return true;
  612. }
  613. cpu->halted = 0;
  614. }
  615. #endif /* !CONFIG_USER_ONLY */
  616. return false;
  617. }
  618. static inline void cpu_handle_debug_exception(CPUState *cpu)
  619. {
  620. CPUClass *cc = CPU_GET_CLASS(cpu);
  621. CPUWatchpoint *wp;
  622. if (!cpu->watchpoint_hit) {
  623. QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
  624. wp->flags &= ~BP_WATCHPOINT_HIT;
  625. }
  626. }
  627. if (cc->tcg_ops->debug_excp_handler) {
  628. cc->tcg_ops->debug_excp_handler(cpu);
  629. }
  630. }
  631. static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
  632. {
  633. if (cpu->exception_index < 0) {
  634. #ifndef CONFIG_USER_ONLY
  635. if (replay_has_exception()
  636. && cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0) {
  637. /* Execute just one insn to trigger exception pending in the log */
  638. cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
  639. | CF_NOIRQ | 1;
  640. }
  641. #endif
  642. return false;
  643. }
  644. if (cpu->exception_index >= EXCP_INTERRUPT) {
  645. /* exit request from the cpu execution loop */
  646. *ret = cpu->exception_index;
  647. if (*ret == EXCP_DEBUG) {
  648. cpu_handle_debug_exception(cpu);
  649. }
  650. cpu->exception_index = -1;
  651. return true;
  652. } else {
  653. #if defined(CONFIG_USER_ONLY)
  654. /* if user mode only, we simulate a fake exception
  655. which will be handled outside the cpu execution
  656. loop */
  657. #if defined(TARGET_I386)
  658. CPUClass *cc = CPU_GET_CLASS(cpu);
  659. cc->tcg_ops->fake_user_interrupt(cpu);
  660. #endif /* TARGET_I386 */
  661. *ret = cpu->exception_index;
  662. cpu->exception_index = -1;
  663. return true;
  664. #else
  665. if (replay_exception()) {
  666. CPUClass *cc = CPU_GET_CLASS(cpu);
  667. qemu_mutex_lock_iothread();
  668. cc->tcg_ops->do_interrupt(cpu);
  669. qemu_mutex_unlock_iothread();
  670. cpu->exception_index = -1;
  671. if (unlikely(cpu->singlestep_enabled)) {
  672. /*
  673. * After processing the exception, ensure an EXCP_DEBUG is
  674. * raised when single-stepping so that GDB doesn't miss the
  675. * next instruction.
  676. */
  677. *ret = EXCP_DEBUG;
  678. cpu_handle_debug_exception(cpu);
  679. return true;
  680. }
  681. } else if (!replay_has_interrupt()) {
  682. /* give a chance to iothread in replay mode */
  683. *ret = EXCP_INTERRUPT;
  684. return true;
  685. }
  686. #endif
  687. }
  688. return false;
  689. }
  690. #ifndef CONFIG_USER_ONLY
  691. /*
  692. * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
  693. * "real" interrupt event later. It does not need to be recorded for
  694. * replay purposes.
  695. */
  696. static inline bool need_replay_interrupt(int interrupt_request)
  697. {
  698. #if defined(TARGET_I386)
  699. return !(interrupt_request & CPU_INTERRUPT_POLL);
  700. #else
  701. return true;
  702. #endif
  703. }
  704. #endif /* !CONFIG_USER_ONLY */
  705. static inline bool cpu_handle_interrupt(CPUState *cpu,
  706. TranslationBlock **last_tb)
  707. {
  708. /*
  709. * If we have requested custom cflags with CF_NOIRQ we should
  710. * skip checking here. Any pending interrupts will get picked up
  711. * by the next TB we execute under normal cflags.
  712. */
  713. if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
  714. return false;
  715. }
  716. /* Clear the interrupt flag now since we're processing
  717. * cpu->interrupt_request and cpu->exit_request.
  718. * Ensure zeroing happens before reading cpu->exit_request or
  719. * cpu->interrupt_request (see also smp_wmb in cpu_exit())
  720. */
  721. qatomic_set_mb(&cpu->neg.icount_decr.u16.high, 0);
  722. if (unlikely(qatomic_read(&cpu->interrupt_request))) {
  723. int interrupt_request;
  724. qemu_mutex_lock_iothread();
  725. interrupt_request = cpu->interrupt_request;
  726. if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
  727. /* Mask out external interrupts for this step. */
  728. interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
  729. }
  730. if (interrupt_request & CPU_INTERRUPT_DEBUG) {
  731. cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
  732. cpu->exception_index = EXCP_DEBUG;
  733. qemu_mutex_unlock_iothread();
  734. return true;
  735. }
  736. #if !defined(CONFIG_USER_ONLY)
  737. if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
  738. /* Do nothing */
  739. } else if (interrupt_request & CPU_INTERRUPT_HALT) {
  740. replay_interrupt();
  741. cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
  742. cpu->halted = 1;
  743. cpu->exception_index = EXCP_HLT;
  744. qemu_mutex_unlock_iothread();
  745. return true;
  746. }
  747. #if defined(TARGET_I386)
  748. else if (interrupt_request & CPU_INTERRUPT_INIT) {
  749. X86CPU *x86_cpu = X86_CPU(cpu);
  750. CPUArchState *env = &x86_cpu->env;
  751. replay_interrupt();
  752. cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
  753. do_cpu_init(x86_cpu);
  754. cpu->exception_index = EXCP_HALTED;
  755. qemu_mutex_unlock_iothread();
  756. return true;
  757. }
  758. #else
  759. else if (interrupt_request & CPU_INTERRUPT_RESET) {
  760. replay_interrupt();
  761. cpu_reset(cpu);
  762. qemu_mutex_unlock_iothread();
  763. return true;
  764. }
  765. #endif /* !TARGET_I386 */
  766. /* The target hook has 3 exit conditions:
  767. False when the interrupt isn't processed,
  768. True when it is, and we should restart on a new TB,
  769. and via longjmp via cpu_loop_exit. */
  770. else {
  771. CPUClass *cc = CPU_GET_CLASS(cpu);
  772. if (cc->tcg_ops->cpu_exec_interrupt &&
  773. cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
  774. if (need_replay_interrupt(interrupt_request)) {
  775. replay_interrupt();
  776. }
  777. /*
  778. * After processing the interrupt, ensure an EXCP_DEBUG is
  779. * raised when single-stepping so that GDB doesn't miss the
  780. * next instruction.
  781. */
  782. if (unlikely(cpu->singlestep_enabled)) {
  783. cpu->exception_index = EXCP_DEBUG;
  784. qemu_mutex_unlock_iothread();
  785. return true;
  786. }
  787. cpu->exception_index = -1;
  788. *last_tb = NULL;
  789. }
  790. /* The target hook may have updated the 'cpu->interrupt_request';
  791. * reload the 'interrupt_request' value */
  792. interrupt_request = cpu->interrupt_request;
  793. }
  794. #endif /* !CONFIG_USER_ONLY */
  795. if (interrupt_request & CPU_INTERRUPT_EXITTB) {
  796. cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
  797. /* ensure that no TB jump will be modified as
  798. the program flow was changed */
  799. *last_tb = NULL;
  800. }
  801. /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
  802. qemu_mutex_unlock_iothread();
  803. }
  804. /* Finally, check if we need to exit to the main loop. */
  805. if (unlikely(qatomic_read(&cpu->exit_request))
  806. || (icount_enabled()
  807. && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
  808. && cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0)) {
  809. qatomic_set(&cpu->exit_request, 0);
  810. if (cpu->exception_index == -1) {
  811. cpu->exception_index = EXCP_INTERRUPT;
  812. }
  813. return true;
  814. }
  815. return false;
  816. }
  817. static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
  818. vaddr pc, TranslationBlock **last_tb,
  819. int *tb_exit)
  820. {
  821. int32_t insns_left;
  822. trace_exec_tb(tb, pc);
  823. tb = cpu_tb_exec(cpu, tb, tb_exit);
  824. if (*tb_exit != TB_EXIT_REQUESTED) {
  825. *last_tb = tb;
  826. return;
  827. }
  828. *last_tb = NULL;
  829. insns_left = qatomic_read(&cpu->neg.icount_decr.u32);
  830. if (insns_left < 0) {
  831. /* Something asked us to stop executing chained TBs; just
  832. * continue round the main loop. Whatever requested the exit
  833. * will also have set something else (eg exit_request or
  834. * interrupt_request) which will be handled by
  835. * cpu_handle_interrupt. cpu_handle_interrupt will also
  836. * clear cpu->icount_decr.u16.high.
  837. */
  838. return;
  839. }
  840. /* Instruction counter expired. */
  841. assert(icount_enabled());
  842. #ifndef CONFIG_USER_ONLY
  843. /* Ensure global icount has gone forward */
  844. icount_update(cpu);
  845. /* Refill decrementer and continue execution. */
  846. insns_left = MIN(0xffff, cpu->icount_budget);
  847. cpu->neg.icount_decr.u16.low = insns_left;
  848. cpu->icount_extra = cpu->icount_budget - insns_left;
  849. /*
  850. * If the next tb has more instructions than we have left to
  851. * execute we need to ensure we find/generate a TB with exactly
  852. * insns_left instructions in it.
  853. */
  854. if (insns_left > 0 && insns_left < tb->icount) {
  855. assert(insns_left <= CF_COUNT_MASK);
  856. assert(cpu->icount_extra == 0);
  857. cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
  858. }
  859. #endif
  860. }
  861. /* main execution loop */
  862. static int __attribute__((noinline))
  863. cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
  864. {
  865. int ret;
  866. /* if an exception is pending, we execute it here */
  867. while (!cpu_handle_exception(cpu, &ret)) {
  868. TranslationBlock *last_tb = NULL;
  869. int tb_exit = 0;
  870. while (!cpu_handle_interrupt(cpu, &last_tb)) {
  871. TranslationBlock *tb;
  872. vaddr pc;
  873. uint64_t cs_base;
  874. uint32_t flags, cflags;
  875. cpu_get_tb_cpu_state(cpu_env(cpu), &pc, &cs_base, &flags);
  876. /*
  877. * When requested, use an exact setting for cflags for the next
  878. * execution. This is used for icount, precise smc, and stop-
  879. * after-access watchpoints. Since this request should never
  880. * have CF_INVALID set, -1 is a convenient invalid value that
  881. * does not require tcg headers for cpu_common_reset.
  882. */
  883. cflags = cpu->cflags_next_tb;
  884. if (cflags == -1) {
  885. cflags = curr_cflags(cpu);
  886. } else {
  887. cpu->cflags_next_tb = -1;
  888. }
  889. if (check_for_breakpoints(cpu, pc, &cflags)) {
  890. break;
  891. }
  892. tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
  893. if (tb == NULL) {
  894. CPUJumpCache *jc;
  895. uint32_t h;
  896. mmap_lock();
  897. tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
  898. mmap_unlock();
  899. /*
  900. * We add the TB in the virtual pc hash table
  901. * for the fast lookup
  902. */
  903. h = tb_jmp_cache_hash_func(pc);
  904. jc = cpu->tb_jmp_cache;
  905. if (cflags & CF_PCREL) {
  906. jc->array[h].pc = pc;
  907. /* Ensure pc is written first. */
  908. qatomic_store_release(&jc->array[h].tb, tb);
  909. } else {
  910. /* Use the pc value already stored in tb->pc. */
  911. qatomic_set(&jc->array[h].tb, tb);
  912. }
  913. }
  914. #ifndef CONFIG_USER_ONLY
  915. /*
  916. * We don't take care of direct jumps when address mapping
  917. * changes in system emulation. So it's not safe to make a
  918. * direct jump to a TB spanning two pages because the mapping
  919. * for the second page can change.
  920. */
  921. if (tb_page_addr1(tb) != -1) {
  922. last_tb = NULL;
  923. }
  924. #endif
  925. /* See if we can patch the calling TB. */
  926. if (last_tb) {
  927. tb_add_jump(last_tb, tb_exit, tb);
  928. }
  929. cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
  930. /* Try to align the host and virtual clocks
  931. if the guest is in advance */
  932. align_clocks(sc, cpu);
  933. }
  934. }
  935. return ret;
  936. }
  937. static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc)
  938. {
  939. /* Prepare setjmp context for exception handling. */
  940. if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) {
  941. cpu_exec_longjmp_cleanup(cpu);
  942. }
  943. return cpu_exec_loop(cpu, sc);
  944. }
  945. int cpu_exec(CPUState *cpu)
  946. {
  947. int ret;
  948. SyncClocks sc = { 0 };
  949. /* replay_interrupt may need current_cpu */
  950. current_cpu = cpu;
  951. if (cpu_handle_halt(cpu)) {
  952. return EXCP_HALTED;
  953. }
  954. rcu_read_lock();
  955. cpu_exec_enter(cpu);
  956. /*
  957. * Calculate difference between guest clock and host clock.
  958. * This delay includes the delay of the last cycle, so
  959. * what we have to do is sleep until it is 0. As for the
  960. * advance/delay we gain here, we try to fix it next time.
  961. */
  962. init_delay_params(&sc, cpu);
  963. ret = cpu_exec_setjmp(cpu, &sc);
  964. cpu_exec_exit(cpu);
  965. rcu_read_unlock();
  966. return ret;
  967. }
  968. bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
  969. {
  970. static bool tcg_target_initialized;
  971. CPUClass *cc = CPU_GET_CLASS(cpu);
  972. if (!tcg_target_initialized) {
  973. cc->tcg_ops->initialize();
  974. tcg_target_initialized = true;
  975. }
  976. cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1);
  977. tlb_init(cpu);
  978. #ifndef CONFIG_USER_ONLY
  979. tcg_iommu_init_notifier_list(cpu);
  980. #endif /* !CONFIG_USER_ONLY */
  981. /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */
  982. return true;
  983. }
  984. /* undo the initializations in reverse order */
  985. void tcg_exec_unrealizefn(CPUState *cpu)
  986. {
  987. #ifndef CONFIG_USER_ONLY
  988. tcg_iommu_free_notifier_list(cpu);
  989. #endif /* !CONFIG_USER_ONLY */
  990. tlb_destroy(cpu);
  991. g_free_rcu(cpu->tb_jmp_cache, rcu);
  992. }