kqemu.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997
  1. /*
  2. * KQEMU support
  3. *
  4. * Copyright (c) 2005-2008 Fabrice Bellard
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #ifdef _WIN32
  22. #define WIN32_LEAN_AND_MEAN
  23. #include <windows.h>
  24. #include <winioctl.h>
  25. #else
  26. #include <sys/types.h>
  27. #include <sys/mman.h>
  28. #include <sys/ioctl.h>
  29. #endif
  30. #ifdef HOST_SOLARIS
  31. #include <sys/ioccom.h>
  32. #endif
  33. #include <stdlib.h>
  34. #include <stdio.h>
  35. #include <stdarg.h>
  36. #include <string.h>
  37. #include <errno.h>
  38. #include <unistd.h>
  39. #include <inttypes.h>
  40. #include "cpu.h"
  41. #include "exec-all.h"
  42. #include "qemu-common.h"
  43. #ifdef USE_KQEMU
  44. #define DEBUG
  45. //#define PROFILE
  46. #ifdef DEBUG
  47. # define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
  48. # define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
  49. #else
  50. # define LOG_INT(...) do { } while (0)
  51. # define LOG_INT_STATE(env) do { } while (0)
  52. #endif
  53. #include <unistd.h>
  54. #include <fcntl.h>
  55. #include "kqemu.h"
  56. #ifdef _WIN32
  57. #define KQEMU_DEVICE "\\\\.\\kqemu"
  58. #else
  59. #define KQEMU_DEVICE "/dev/kqemu"
  60. #endif
  61. static void qpi_init(void);
  62. #ifdef _WIN32
  63. #define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
  64. HANDLE kqemu_fd = KQEMU_INVALID_FD;
  65. #define kqemu_closefd(x) CloseHandle(x)
  66. #else
  67. #define KQEMU_INVALID_FD -1
  68. int kqemu_fd = KQEMU_INVALID_FD;
  69. #define kqemu_closefd(x) close(x)
  70. #endif
  71. /* 0 = not allowed
  72. 1 = user kqemu
  73. 2 = kernel kqemu
  74. */
  75. int kqemu_allowed = 1;
  76. uint64_t *pages_to_flush;
  77. unsigned int nb_pages_to_flush;
  78. uint64_t *ram_pages_to_update;
  79. unsigned int nb_ram_pages_to_update;
  80. uint64_t *modified_ram_pages;
  81. unsigned int nb_modified_ram_pages;
  82. uint8_t *modified_ram_pages_table;
  83. int qpi_io_memory;
  84. uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
  85. #define cpuid(index, eax, ebx, ecx, edx) \
  86. asm volatile ("cpuid" \
  87. : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
  88. : "0" (index))
  89. #ifdef __x86_64__
  90. static int is_cpuid_supported(void)
  91. {
  92. return 1;
  93. }
  94. #else
  95. static int is_cpuid_supported(void)
  96. {
  97. int v0, v1;
  98. asm volatile ("pushf\n"
  99. "popl %0\n"
  100. "movl %0, %1\n"
  101. "xorl $0x00200000, %0\n"
  102. "pushl %0\n"
  103. "popf\n"
  104. "pushf\n"
  105. "popl %0\n"
  106. : "=a" (v0), "=d" (v1)
  107. :
  108. : "cc");
  109. return (v0 != v1);
  110. }
  111. #endif
  112. static void kqemu_update_cpuid(CPUState *env)
  113. {
  114. int critical_features_mask, features, ext_features, ext_features_mask;
  115. uint32_t eax, ebx, ecx, edx;
  116. /* the following features are kept identical on the host and
  117. target cpus because they are important for user code. Strictly
  118. speaking, only SSE really matters because the OS must support
  119. it if the user code uses it. */
  120. critical_features_mask =
  121. CPUID_CMOV | CPUID_CX8 |
  122. CPUID_FXSR | CPUID_MMX | CPUID_SSE |
  123. CPUID_SSE2 | CPUID_SEP;
  124. ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
  125. if (!is_cpuid_supported()) {
  126. features = 0;
  127. ext_features = 0;
  128. } else {
  129. cpuid(1, eax, ebx, ecx, edx);
  130. features = edx;
  131. ext_features = ecx;
  132. }
  133. #ifdef __x86_64__
  134. /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
  135. compatibility mode, so in order to have the best performances
  136. it is better not to use it */
  137. features &= ~CPUID_SEP;
  138. #endif
  139. env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
  140. (features & critical_features_mask);
  141. env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
  142. (ext_features & ext_features_mask);
  143. /* XXX: we could update more of the target CPUID state so that the
  144. non accelerated code sees exactly the same CPU features as the
  145. accelerated code */
  146. }
  147. int kqemu_init(CPUState *env)
  148. {
  149. struct kqemu_init kinit;
  150. int ret, version;
  151. #ifdef _WIN32
  152. DWORD temp;
  153. #endif
  154. if (!kqemu_allowed)
  155. return -1;
  156. #ifdef _WIN32
  157. kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
  158. FILE_SHARE_READ | FILE_SHARE_WRITE,
  159. NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
  160. NULL);
  161. if (kqemu_fd == KQEMU_INVALID_FD) {
  162. fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
  163. KQEMU_DEVICE, GetLastError());
  164. return -1;
  165. }
  166. #else
  167. kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
  168. if (kqemu_fd == KQEMU_INVALID_FD) {
  169. fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
  170. KQEMU_DEVICE, strerror(errno));
  171. return -1;
  172. }
  173. #endif
  174. version = 0;
  175. #ifdef _WIN32
  176. DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
  177. &version, sizeof(version), &temp, NULL);
  178. #else
  179. ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
  180. #endif
  181. if (version != KQEMU_VERSION) {
  182. fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
  183. version, KQEMU_VERSION);
  184. goto fail;
  185. }
  186. pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
  187. sizeof(uint64_t));
  188. if (!pages_to_flush)
  189. goto fail;
  190. ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
  191. sizeof(uint64_t));
  192. if (!ram_pages_to_update)
  193. goto fail;
  194. modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
  195. sizeof(uint64_t));
  196. if (!modified_ram_pages)
  197. goto fail;
  198. modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
  199. if (!modified_ram_pages_table)
  200. goto fail;
  201. memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
  202. kinit.ram_base = phys_ram_base;
  203. kinit.ram_size = phys_ram_size;
  204. kinit.ram_dirty = phys_ram_dirty;
  205. kinit.pages_to_flush = pages_to_flush;
  206. kinit.ram_pages_to_update = ram_pages_to_update;
  207. kinit.modified_ram_pages = modified_ram_pages;
  208. #ifdef _WIN32
  209. ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
  210. NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
  211. #else
  212. ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
  213. #endif
  214. if (ret < 0) {
  215. fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
  216. fail:
  217. kqemu_closefd(kqemu_fd);
  218. kqemu_fd = KQEMU_INVALID_FD;
  219. return -1;
  220. }
  221. kqemu_update_cpuid(env);
  222. env->kqemu_enabled = kqemu_allowed;
  223. nb_pages_to_flush = 0;
  224. nb_ram_pages_to_update = 0;
  225. qpi_init();
  226. return 0;
  227. }
  228. void kqemu_flush_page(CPUState *env, target_ulong addr)
  229. {
  230. LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
  231. if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
  232. nb_pages_to_flush = KQEMU_FLUSH_ALL;
  233. else
  234. pages_to_flush[nb_pages_to_flush++] = addr;
  235. }
  236. void kqemu_flush(CPUState *env, int global)
  237. {
  238. LOG_INT("kqemu_flush:\n");
  239. nb_pages_to_flush = KQEMU_FLUSH_ALL;
  240. }
  241. void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
  242. {
  243. LOG_INT("kqemu_set_notdirty: addr=%08lx\n",
  244. (unsigned long)ram_addr);
  245. /* we only track transitions to dirty state */
  246. if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
  247. return;
  248. if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
  249. nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
  250. else
  251. ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
  252. }
  253. static void kqemu_reset_modified_ram_pages(void)
  254. {
  255. int i;
  256. unsigned long page_index;
  257. for(i = 0; i < nb_modified_ram_pages; i++) {
  258. page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
  259. modified_ram_pages_table[page_index] = 0;
  260. }
  261. nb_modified_ram_pages = 0;
  262. }
  263. void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
  264. {
  265. unsigned long page_index;
  266. int ret;
  267. #ifdef _WIN32
  268. DWORD temp;
  269. #endif
  270. page_index = ram_addr >> TARGET_PAGE_BITS;
  271. if (!modified_ram_pages_table[page_index]) {
  272. #if 0
  273. printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
  274. #endif
  275. modified_ram_pages_table[page_index] = 1;
  276. modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
  277. if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
  278. /* flush */
  279. #ifdef _WIN32
  280. ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
  281. &nb_modified_ram_pages,
  282. sizeof(nb_modified_ram_pages),
  283. NULL, 0, &temp, NULL);
  284. #else
  285. ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
  286. &nb_modified_ram_pages);
  287. #endif
  288. kqemu_reset_modified_ram_pages();
  289. }
  290. }
  291. }
  292. void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
  293. ram_addr_t phys_offset)
  294. {
  295. struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
  296. uint64_t end;
  297. int ret, io_index;
  298. end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
  299. start_addr &= TARGET_PAGE_MASK;
  300. kphys_mem->phys_addr = start_addr;
  301. kphys_mem->size = end - start_addr;
  302. kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
  303. io_index = phys_offset & ~TARGET_PAGE_MASK;
  304. switch(io_index) {
  305. case IO_MEM_RAM:
  306. kphys_mem->io_index = KQEMU_IO_MEM_RAM;
  307. break;
  308. case IO_MEM_ROM:
  309. kphys_mem->io_index = KQEMU_IO_MEM_ROM;
  310. break;
  311. default:
  312. if (qpi_io_memory == io_index) {
  313. kphys_mem->io_index = KQEMU_IO_MEM_COMM;
  314. } else {
  315. kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
  316. }
  317. break;
  318. }
  319. #ifdef _WIN32
  320. {
  321. DWORD temp;
  322. ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
  323. kphys_mem, sizeof(*kphys_mem),
  324. NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
  325. }
  326. #else
  327. ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
  328. #endif
  329. if (ret < 0) {
  330. fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
  331. ret, start_addr,
  332. (unsigned long)size, (unsigned long)phys_offset);
  333. }
  334. }
  335. struct fpstate {
  336. uint16_t fpuc;
  337. uint16_t dummy1;
  338. uint16_t fpus;
  339. uint16_t dummy2;
  340. uint16_t fptag;
  341. uint16_t dummy3;
  342. uint32_t fpip;
  343. uint32_t fpcs;
  344. uint32_t fpoo;
  345. uint32_t fpos;
  346. uint8_t fpregs1[8 * 10];
  347. };
  348. struct fpxstate {
  349. uint16_t fpuc;
  350. uint16_t fpus;
  351. uint16_t fptag;
  352. uint16_t fop;
  353. uint32_t fpuip;
  354. uint16_t cs_sel;
  355. uint16_t dummy0;
  356. uint32_t fpudp;
  357. uint16_t ds_sel;
  358. uint16_t dummy1;
  359. uint32_t mxcsr;
  360. uint32_t mxcsr_mask;
  361. uint8_t fpregs1[8 * 16];
  362. uint8_t xmm_regs[16 * 16];
  363. uint8_t dummy2[96];
  364. };
  365. static struct fpxstate fpx1 __attribute__((aligned(16)));
  366. static void restore_native_fp_frstor(CPUState *env)
  367. {
  368. int fptag, i, j;
  369. struct fpstate fp1, *fp = &fp1;
  370. fp->fpuc = env->fpuc;
  371. fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
  372. fptag = 0;
  373. for (i=7; i>=0; i--) {
  374. fptag <<= 2;
  375. if (env->fptags[i]) {
  376. fptag |= 3;
  377. } else {
  378. /* the FPU automatically computes it */
  379. }
  380. }
  381. fp->fptag = fptag;
  382. j = env->fpstt;
  383. for(i = 0;i < 8; i++) {
  384. memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
  385. j = (j + 1) & 7;
  386. }
  387. asm volatile ("frstor %0" : "=m" (*fp));
  388. }
  389. static void save_native_fp_fsave(CPUState *env)
  390. {
  391. int fptag, i, j;
  392. uint16_t fpuc;
  393. struct fpstate fp1, *fp = &fp1;
  394. asm volatile ("fsave %0" : : "m" (*fp));
  395. env->fpuc = fp->fpuc;
  396. env->fpstt = (fp->fpus >> 11) & 7;
  397. env->fpus = fp->fpus & ~0x3800;
  398. fptag = fp->fptag;
  399. for(i = 0;i < 8; i++) {
  400. env->fptags[i] = ((fptag & 3) == 3);
  401. fptag >>= 2;
  402. }
  403. j = env->fpstt;
  404. for(i = 0;i < 8; i++) {
  405. memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
  406. j = (j + 1) & 7;
  407. }
  408. /* we must restore the default rounding state */
  409. fpuc = 0x037f | (env->fpuc & (3 << 10));
  410. asm volatile("fldcw %0" : : "m" (fpuc));
  411. }
  412. static void restore_native_fp_fxrstor(CPUState *env)
  413. {
  414. struct fpxstate *fp = &fpx1;
  415. int i, j, fptag;
  416. fp->fpuc = env->fpuc;
  417. fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
  418. fptag = 0;
  419. for(i = 0; i < 8; i++)
  420. fptag |= (env->fptags[i] << i);
  421. fp->fptag = fptag ^ 0xff;
  422. j = env->fpstt;
  423. for(i = 0;i < 8; i++) {
  424. memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
  425. j = (j + 1) & 7;
  426. }
  427. if (env->cpuid_features & CPUID_SSE) {
  428. fp->mxcsr = env->mxcsr;
  429. /* XXX: check if DAZ is not available */
  430. fp->mxcsr_mask = 0xffff;
  431. memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
  432. }
  433. asm volatile ("fxrstor %0" : "=m" (*fp));
  434. }
  435. static void save_native_fp_fxsave(CPUState *env)
  436. {
  437. struct fpxstate *fp = &fpx1;
  438. int fptag, i, j;
  439. uint16_t fpuc;
  440. asm volatile ("fxsave %0" : : "m" (*fp));
  441. env->fpuc = fp->fpuc;
  442. env->fpstt = (fp->fpus >> 11) & 7;
  443. env->fpus = fp->fpus & ~0x3800;
  444. fptag = fp->fptag ^ 0xff;
  445. for(i = 0;i < 8; i++) {
  446. env->fptags[i] = (fptag >> i) & 1;
  447. }
  448. j = env->fpstt;
  449. for(i = 0;i < 8; i++) {
  450. memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
  451. j = (j + 1) & 7;
  452. }
  453. if (env->cpuid_features & CPUID_SSE) {
  454. env->mxcsr = fp->mxcsr;
  455. memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
  456. }
  457. /* we must restore the default rounding state */
  458. asm volatile ("fninit");
  459. fpuc = 0x037f | (env->fpuc & (3 << 10));
  460. asm volatile("fldcw %0" : : "m" (fpuc));
  461. }
  462. static int do_syscall(CPUState *env,
  463. struct kqemu_cpu_state *kenv)
  464. {
  465. int selector;
  466. selector = (env->star >> 32) & 0xffff;
  467. #ifdef TARGET_X86_64
  468. if (env->hflags & HF_LMA_MASK) {
  469. int code64;
  470. env->regs[R_ECX] = kenv->next_eip;
  471. env->regs[11] = env->eflags;
  472. code64 = env->hflags & HF_CS64_MASK;
  473. cpu_x86_set_cpl(env, 0);
  474. cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
  475. 0, 0xffffffff,
  476. DESC_G_MASK | DESC_P_MASK |
  477. DESC_S_MASK |
  478. DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
  479. cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
  480. 0, 0xffffffff,
  481. DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
  482. DESC_S_MASK |
  483. DESC_W_MASK | DESC_A_MASK);
  484. env->eflags &= ~env->fmask;
  485. if (code64)
  486. env->eip = env->lstar;
  487. else
  488. env->eip = env->cstar;
  489. } else
  490. #endif
  491. {
  492. env->regs[R_ECX] = (uint32_t)kenv->next_eip;
  493. cpu_x86_set_cpl(env, 0);
  494. cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
  495. 0, 0xffffffff,
  496. DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
  497. DESC_S_MASK |
  498. DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
  499. cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
  500. 0, 0xffffffff,
  501. DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
  502. DESC_S_MASK |
  503. DESC_W_MASK | DESC_A_MASK);
  504. env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
  505. env->eip = (uint32_t)env->star;
  506. }
  507. return 2;
  508. }
  509. #ifdef CONFIG_PROFILER
  510. #define PC_REC_SIZE 1
  511. #define PC_REC_HASH_BITS 16
  512. #define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
  513. typedef struct PCRecord {
  514. unsigned long pc;
  515. int64_t count;
  516. struct PCRecord *next;
  517. } PCRecord;
  518. static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
  519. static int nb_pc_records;
  520. static void kqemu_record_pc(unsigned long pc)
  521. {
  522. unsigned long h;
  523. PCRecord **pr, *r;
  524. h = pc / PC_REC_SIZE;
  525. h = h ^ (h >> PC_REC_HASH_BITS);
  526. h &= (PC_REC_HASH_SIZE - 1);
  527. pr = &pc_rec_hash[h];
  528. for(;;) {
  529. r = *pr;
  530. if (r == NULL)
  531. break;
  532. if (r->pc == pc) {
  533. r->count++;
  534. return;
  535. }
  536. pr = &r->next;
  537. }
  538. r = malloc(sizeof(PCRecord));
  539. r->count = 1;
  540. r->pc = pc;
  541. r->next = NULL;
  542. *pr = r;
  543. nb_pc_records++;
  544. }
  545. static int pc_rec_cmp(const void *p1, const void *p2)
  546. {
  547. PCRecord *r1 = *(PCRecord **)p1;
  548. PCRecord *r2 = *(PCRecord **)p2;
  549. if (r1->count < r2->count)
  550. return 1;
  551. else if (r1->count == r2->count)
  552. return 0;
  553. else
  554. return -1;
  555. }
  556. static void kqemu_record_flush(void)
  557. {
  558. PCRecord *r, *r_next;
  559. int h;
  560. for(h = 0; h < PC_REC_HASH_SIZE; h++) {
  561. for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
  562. r_next = r->next;
  563. free(r);
  564. }
  565. pc_rec_hash[h] = NULL;
  566. }
  567. nb_pc_records = 0;
  568. }
  569. void kqemu_record_dump(void)
  570. {
  571. PCRecord **pr, *r;
  572. int i, h;
  573. FILE *f;
  574. int64_t total, sum;
  575. pr = malloc(sizeof(PCRecord *) * nb_pc_records);
  576. i = 0;
  577. total = 0;
  578. for(h = 0; h < PC_REC_HASH_SIZE; h++) {
  579. for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
  580. pr[i++] = r;
  581. total += r->count;
  582. }
  583. }
  584. qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
  585. f = fopen("/tmp/kqemu.stats", "w");
  586. if (!f) {
  587. perror("/tmp/kqemu.stats");
  588. exit(1);
  589. }
  590. fprintf(f, "total: %" PRId64 "\n", total);
  591. sum = 0;
  592. for(i = 0; i < nb_pc_records; i++) {
  593. r = pr[i];
  594. sum += r->count;
  595. fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
  596. r->pc,
  597. r->count,
  598. (double)r->count / (double)total * 100.0,
  599. (double)sum / (double)total * 100.0);
  600. }
  601. fclose(f);
  602. free(pr);
  603. kqemu_record_flush();
  604. }
  605. #endif
  606. static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
  607. const SegmentCache *sc)
  608. {
  609. ksc->selector = sc->selector;
  610. ksc->flags = sc->flags;
  611. ksc->limit = sc->limit;
  612. ksc->base = sc->base;
  613. }
  614. static inline void kqemu_save_seg(SegmentCache *sc,
  615. const struct kqemu_segment_cache *ksc)
  616. {
  617. sc->selector = ksc->selector;
  618. sc->flags = ksc->flags;
  619. sc->limit = ksc->limit;
  620. sc->base = ksc->base;
  621. }
  622. int kqemu_cpu_exec(CPUState *env)
  623. {
  624. struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
  625. int ret, cpl, i;
  626. #ifdef CONFIG_PROFILER
  627. int64_t ti;
  628. #endif
  629. #ifdef _WIN32
  630. DWORD temp;
  631. #endif
  632. #ifdef CONFIG_PROFILER
  633. ti = profile_getclock();
  634. #endif
  635. LOG_INT("kqemu: cpu_exec: enter\n");
  636. LOG_INT_STATE(env);
  637. for(i = 0; i < CPU_NB_REGS; i++)
  638. kenv->regs[i] = env->regs[i];
  639. kenv->eip = env->eip;
  640. kenv->eflags = env->eflags;
  641. for(i = 0; i < 6; i++)
  642. kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
  643. kqemu_load_seg(&kenv->ldt, &env->ldt);
  644. kqemu_load_seg(&kenv->tr, &env->tr);
  645. kqemu_load_seg(&kenv->gdt, &env->gdt);
  646. kqemu_load_seg(&kenv->idt, &env->idt);
  647. kenv->cr0 = env->cr[0];
  648. kenv->cr2 = env->cr[2];
  649. kenv->cr3 = env->cr[3];
  650. kenv->cr4 = env->cr[4];
  651. kenv->a20_mask = env->a20_mask;
  652. kenv->efer = env->efer;
  653. kenv->tsc_offset = 0;
  654. kenv->star = env->star;
  655. kenv->sysenter_cs = env->sysenter_cs;
  656. kenv->sysenter_esp = env->sysenter_esp;
  657. kenv->sysenter_eip = env->sysenter_eip;
  658. #ifdef TARGET_X86_64
  659. kenv->lstar = env->lstar;
  660. kenv->cstar = env->cstar;
  661. kenv->fmask = env->fmask;
  662. kenv->kernelgsbase = env->kernelgsbase;
  663. #endif
  664. if (env->dr[7] & 0xff) {
  665. kenv->dr7 = env->dr[7];
  666. kenv->dr0 = env->dr[0];
  667. kenv->dr1 = env->dr[1];
  668. kenv->dr2 = env->dr[2];
  669. kenv->dr3 = env->dr[3];
  670. } else {
  671. kenv->dr7 = 0;
  672. }
  673. kenv->dr6 = env->dr[6];
  674. cpl = (env->hflags & HF_CPL_MASK);
  675. kenv->cpl = cpl;
  676. kenv->nb_pages_to_flush = nb_pages_to_flush;
  677. kenv->user_only = (env->kqemu_enabled == 1);
  678. kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
  679. nb_ram_pages_to_update = 0;
  680. kenv->nb_modified_ram_pages = nb_modified_ram_pages;
  681. kqemu_reset_modified_ram_pages();
  682. if (env->cpuid_features & CPUID_FXSR)
  683. restore_native_fp_fxrstor(env);
  684. else
  685. restore_native_fp_frstor(env);
  686. #ifdef _WIN32
  687. if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
  688. kenv, sizeof(struct kqemu_cpu_state),
  689. kenv, sizeof(struct kqemu_cpu_state),
  690. &temp, NULL)) {
  691. ret = kenv->retval;
  692. } else {
  693. ret = -1;
  694. }
  695. #else
  696. ioctl(kqemu_fd, KQEMU_EXEC, kenv);
  697. ret = kenv->retval;
  698. #endif
  699. if (env->cpuid_features & CPUID_FXSR)
  700. save_native_fp_fxsave(env);
  701. else
  702. save_native_fp_fsave(env);
  703. for(i = 0; i < CPU_NB_REGS; i++)
  704. env->regs[i] = kenv->regs[i];
  705. env->eip = kenv->eip;
  706. env->eflags = kenv->eflags;
  707. for(i = 0; i < 6; i++)
  708. kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
  709. cpu_x86_set_cpl(env, kenv->cpl);
  710. kqemu_save_seg(&env->ldt, &kenv->ldt);
  711. env->cr[0] = kenv->cr0;
  712. env->cr[4] = kenv->cr4;
  713. env->cr[3] = kenv->cr3;
  714. env->cr[2] = kenv->cr2;
  715. env->dr[6] = kenv->dr6;
  716. #ifdef TARGET_X86_64
  717. env->kernelgsbase = kenv->kernelgsbase;
  718. #endif
  719. /* flush pages as indicated by kqemu */
  720. if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
  721. tlb_flush(env, 1);
  722. } else {
  723. for(i = 0; i < kenv->nb_pages_to_flush; i++) {
  724. tlb_flush_page(env, pages_to_flush[i]);
  725. }
  726. }
  727. nb_pages_to_flush = 0;
  728. #ifdef CONFIG_PROFILER
  729. kqemu_time += profile_getclock() - ti;
  730. kqemu_exec_count++;
  731. #endif
  732. if (kenv->nb_ram_pages_to_update > 0) {
  733. cpu_tlb_update_dirty(env);
  734. }
  735. if (kenv->nb_modified_ram_pages > 0) {
  736. for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
  737. unsigned long addr;
  738. addr = modified_ram_pages[i];
  739. tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
  740. }
  741. }
  742. /* restore the hidden flags */
  743. {
  744. unsigned int new_hflags;
  745. #ifdef TARGET_X86_64
  746. if ((env->hflags & HF_LMA_MASK) &&
  747. (env->segs[R_CS].flags & DESC_L_MASK)) {
  748. /* long mode */
  749. new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
  750. } else
  751. #endif
  752. {
  753. /* legacy / compatibility case */
  754. new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
  755. >> (DESC_B_SHIFT - HF_CS32_SHIFT);
  756. new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
  757. >> (DESC_B_SHIFT - HF_SS32_SHIFT);
  758. if (!(env->cr[0] & CR0_PE_MASK) ||
  759. (env->eflags & VM_MASK) ||
  760. !(env->hflags & HF_CS32_MASK)) {
  761. /* XXX: try to avoid this test. The problem comes from the
  762. fact that is real mode or vm86 mode we only modify the
  763. 'base' and 'selector' fields of the segment cache to go
  764. faster. A solution may be to force addseg to one in
  765. translate-i386.c. */
  766. new_hflags |= HF_ADDSEG_MASK;
  767. } else {
  768. new_hflags |= ((env->segs[R_DS].base |
  769. env->segs[R_ES].base |
  770. env->segs[R_SS].base) != 0) <<
  771. HF_ADDSEG_SHIFT;
  772. }
  773. }
  774. env->hflags = (env->hflags &
  775. ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
  776. new_hflags;
  777. }
  778. /* update FPU flags */
  779. env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
  780. ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
  781. if (env->cr[4] & CR4_OSFXSR_MASK)
  782. env->hflags |= HF_OSFXSR_MASK;
  783. else
  784. env->hflags &= ~HF_OSFXSR_MASK;
  785. LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
  786. if (ret == KQEMU_RET_SYSCALL) {
  787. /* syscall instruction */
  788. return do_syscall(env, kenv);
  789. } else
  790. if ((ret & 0xff00) == KQEMU_RET_INT) {
  791. env->exception_index = ret & 0xff;
  792. env->error_code = 0;
  793. env->exception_is_int = 1;
  794. env->exception_next_eip = kenv->next_eip;
  795. #ifdef CONFIG_PROFILER
  796. kqemu_ret_int_count++;
  797. #endif
  798. LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
  799. LOG_INT_STATE(env);
  800. return 1;
  801. } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
  802. env->exception_index = ret & 0xff;
  803. env->error_code = kenv->error_code;
  804. env->exception_is_int = 0;
  805. env->exception_next_eip = 0;
  806. #ifdef CONFIG_PROFILER
  807. kqemu_ret_excp_count++;
  808. #endif
  809. LOG_INT("kqemu: exception v=%02x e=%04x:\n",
  810. env->exception_index, env->error_code);
  811. LOG_INT_STATE(env);
  812. return 1;
  813. } else if (ret == KQEMU_RET_INTR) {
  814. #ifdef CONFIG_PROFILER
  815. kqemu_ret_intr_count++;
  816. #endif
  817. LOG_INT_STATE(env);
  818. return 0;
  819. } else if (ret == KQEMU_RET_SOFTMMU) {
  820. #ifdef CONFIG_PROFILER
  821. {
  822. unsigned long pc = env->eip + env->segs[R_CS].base;
  823. kqemu_record_pc(pc);
  824. }
  825. #endif
  826. LOG_INT_STATE(env);
  827. return 2;
  828. } else {
  829. cpu_dump_state(env, stderr, fprintf, 0);
  830. fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
  831. exit(1);
  832. }
  833. return 0;
  834. }
  835. void kqemu_cpu_interrupt(CPUState *env)
  836. {
  837. #if defined(_WIN32)
  838. /* cancelling the I/O request causes KQEMU to finish executing the
  839. current block and successfully returning. */
  840. CancelIo(kqemu_fd);
  841. #endif
  842. }
  843. /*
  844. QEMU paravirtualization interface. The current interface only
  845. allows to modify the IF and IOPL flags when running in
  846. kqemu.
  847. At this point it is not very satisfactory. I leave it for reference
  848. as it adds little complexity.
  849. */
  850. #define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
  851. static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
  852. {
  853. return 0;
  854. }
  855. static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
  856. {
  857. return 0;
  858. }
  859. static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
  860. {
  861. }
  862. static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
  863. {
  864. }
  865. static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
  866. {
  867. CPUState *env;
  868. env = cpu_single_env;
  869. if (!env)
  870. return 0;
  871. return env->eflags & (IF_MASK | IOPL_MASK);
  872. }
  873. /* Note: after writing to this address, the guest code must make sure
  874. it is exiting the current TB. pushf/popf can be used for that
  875. purpose. */
  876. static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
  877. {
  878. CPUState *env;
  879. env = cpu_single_env;
  880. if (!env)
  881. return;
  882. env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
  883. (val & (IF_MASK | IOPL_MASK));
  884. }
  885. static CPUReadMemoryFunc *qpi_mem_read[3] = {
  886. qpi_mem_readb,
  887. qpi_mem_readw,
  888. qpi_mem_readl,
  889. };
  890. static CPUWriteMemoryFunc *qpi_mem_write[3] = {
  891. qpi_mem_writeb,
  892. qpi_mem_writew,
  893. qpi_mem_writel,
  894. };
  895. static void qpi_init(void)
  896. {
  897. kqemu_comm_base = 0xff000000 | 1;
  898. qpi_io_memory = cpu_register_io_memory(0,
  899. qpi_mem_read,
  900. qpi_mem_write, NULL);
  901. cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
  902. 0x1000, qpi_io_memory);
  903. }
  904. #endif