vapic.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870
  1. /*
  2. * TPR optimization for 32-bit Windows guests (XP and Server 2003)
  3. *
  4. * Copyright (C) 2007-2008 Qumranet Technologies
  5. * Copyright (C) 2012 Jan Kiszka, Siemens AG
  6. *
  7. * This work is licensed under the terms of the GNU GPL version 2, or
  8. * (at your option) any later version. See the COPYING file in the
  9. * top-level directory.
  10. */
  11. #include "qemu/osdep.h"
  12. #include "qemu/module.h"
  13. #include "system/system.h"
  14. #include "system/cpus.h"
  15. #include "system/hw_accel.h"
  16. #include "system/kvm.h"
  17. #include "system/runstate.h"
  18. #include "exec/address-spaces.h"
  19. #include "hw/i386/apic_internal.h"
  20. #include "hw/sysbus.h"
  21. #include "hw/boards.h"
  22. #include "migration/vmstate.h"
  23. #include "qom/object.h"
  24. #define VAPIC_IO_PORT 0x7e
  25. #define VAPIC_CPU_SHIFT 7
  26. #define ROM_BLOCK_SIZE 512
  27. #define ROM_BLOCK_MASK (~(ROM_BLOCK_SIZE - 1))
  28. typedef enum VAPICMode {
  29. VAPIC_INACTIVE = 0,
  30. VAPIC_ACTIVE = 1,
  31. VAPIC_STANDBY = 2,
  32. } VAPICMode;
  33. typedef struct VAPICHandlers {
  34. uint32_t set_tpr;
  35. uint32_t set_tpr_eax;
  36. uint32_t get_tpr[8];
  37. uint32_t get_tpr_stack;
  38. } QEMU_PACKED VAPICHandlers;
  39. typedef struct GuestROMState {
  40. char signature[8];
  41. uint32_t vaddr;
  42. uint32_t fixup_start;
  43. uint32_t fixup_end;
  44. uint32_t vapic_vaddr;
  45. uint32_t vapic_size;
  46. uint32_t vcpu_shift;
  47. uint32_t real_tpr_addr;
  48. VAPICHandlers up;
  49. VAPICHandlers mp;
  50. } QEMU_PACKED GuestROMState;
  51. struct VAPICROMState {
  52. SysBusDevice busdev;
  53. MemoryRegion io;
  54. MemoryRegion rom;
  55. uint32_t state;
  56. uint32_t rom_state_paddr;
  57. uint32_t rom_state_vaddr;
  58. uint32_t vapic_paddr;
  59. uint32_t real_tpr_addr;
  60. GuestROMState rom_state;
  61. size_t rom_size;
  62. bool rom_mapped_writable;
  63. VMChangeStateEntry *vmsentry;
  64. };
  65. #define TYPE_VAPIC "kvmvapic"
  66. OBJECT_DECLARE_SIMPLE_TYPE(VAPICROMState, VAPIC)
  67. #define TPR_INSTR_ABS_MODRM 0x1
  68. #define TPR_INSTR_MATCH_MODRM_REG 0x2
  69. typedef struct TPRInstruction {
  70. uint8_t opcode;
  71. uint8_t modrm_reg;
  72. unsigned int flags;
  73. TPRAccess access;
  74. size_t length;
  75. off_t addr_offset;
  76. } TPRInstruction;
  77. /* must be sorted by length, shortest first */
  78. static const TPRInstruction tpr_instr[] = {
  79. { /* mov abs to eax */
  80. .opcode = 0xa1,
  81. .access = TPR_ACCESS_READ,
  82. .length = 5,
  83. .addr_offset = 1,
  84. },
  85. { /* mov eax to abs */
  86. .opcode = 0xa3,
  87. .access = TPR_ACCESS_WRITE,
  88. .length = 5,
  89. .addr_offset = 1,
  90. },
  91. { /* mov r32 to r/m32 */
  92. .opcode = 0x89,
  93. .flags = TPR_INSTR_ABS_MODRM,
  94. .access = TPR_ACCESS_WRITE,
  95. .length = 6,
  96. .addr_offset = 2,
  97. },
  98. { /* mov r/m32 to r32 */
  99. .opcode = 0x8b,
  100. .flags = TPR_INSTR_ABS_MODRM,
  101. .access = TPR_ACCESS_READ,
  102. .length = 6,
  103. .addr_offset = 2,
  104. },
  105. { /* push r/m32 */
  106. .opcode = 0xff,
  107. .modrm_reg = 6,
  108. .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
  109. .access = TPR_ACCESS_READ,
  110. .length = 6,
  111. .addr_offset = 2,
  112. },
  113. { /* mov imm32, r/m32 (c7/0) */
  114. .opcode = 0xc7,
  115. .modrm_reg = 0,
  116. .flags = TPR_INSTR_ABS_MODRM | TPR_INSTR_MATCH_MODRM_REG,
  117. .access = TPR_ACCESS_WRITE,
  118. .length = 10,
  119. .addr_offset = 2,
  120. },
  121. };
  122. static void read_guest_rom_state(VAPICROMState *s)
  123. {
  124. cpu_physical_memory_read(s->rom_state_paddr, &s->rom_state,
  125. sizeof(GuestROMState));
  126. }
  127. static void write_guest_rom_state(VAPICROMState *s)
  128. {
  129. cpu_physical_memory_write(s->rom_state_paddr, &s->rom_state,
  130. sizeof(GuestROMState));
  131. }
  132. static void update_guest_rom_state(VAPICROMState *s)
  133. {
  134. read_guest_rom_state(s);
  135. s->rom_state.real_tpr_addr = cpu_to_le32(s->real_tpr_addr);
  136. s->rom_state.vcpu_shift = cpu_to_le32(VAPIC_CPU_SHIFT);
  137. write_guest_rom_state(s);
  138. }
  139. static int find_real_tpr_addr(VAPICROMState *s, CPUX86State *env)
  140. {
  141. CPUState *cs = env_cpu(env);
  142. hwaddr paddr;
  143. target_ulong addr;
  144. if (s->state == VAPIC_ACTIVE) {
  145. return 0;
  146. }
  147. /*
  148. * If there is no prior TPR access instruction we could analyze (which is
  149. * the case after resume from hibernation), we need to scan the possible
  150. * virtual address space for the APIC mapping.
  151. */
  152. for (addr = 0xfffff000; addr >= 0x80000000; addr -= TARGET_PAGE_SIZE) {
  153. paddr = cpu_get_phys_page_debug(cs, addr);
  154. if (paddr != APIC_DEFAULT_ADDRESS) {
  155. continue;
  156. }
  157. s->real_tpr_addr = addr + 0x80;
  158. update_guest_rom_state(s);
  159. return 0;
  160. }
  161. return -1;
  162. }
  163. static uint8_t modrm_reg(uint8_t modrm)
  164. {
  165. return (modrm >> 3) & 7;
  166. }
  167. static bool is_abs_modrm(uint8_t modrm)
  168. {
  169. return (modrm & 0xc7) == 0x05;
  170. }
  171. static bool opcode_matches(uint8_t *opcode, const TPRInstruction *instr)
  172. {
  173. return opcode[0] == instr->opcode &&
  174. (!(instr->flags & TPR_INSTR_ABS_MODRM) || is_abs_modrm(opcode[1])) &&
  175. (!(instr->flags & TPR_INSTR_MATCH_MODRM_REG) ||
  176. modrm_reg(opcode[1]) == instr->modrm_reg);
  177. }
  178. static int evaluate_tpr_instruction(VAPICROMState *s, X86CPU *cpu,
  179. target_ulong *pip, TPRAccess access)
  180. {
  181. CPUState *cs = CPU(cpu);
  182. const TPRInstruction *instr;
  183. target_ulong ip = *pip;
  184. uint8_t opcode[2];
  185. uint32_t real_tpr_addr;
  186. int i;
  187. if ((ip & 0xf0000000ULL) != 0x80000000ULL &&
  188. (ip & 0xf0000000ULL) != 0xe0000000ULL) {
  189. return -1;
  190. }
  191. /*
  192. * Early Windows 2003 SMP initialization contains a
  193. *
  194. * mov imm32, r/m32
  195. *
  196. * instruction that is patched by TPR optimization. The problem is that
  197. * RSP, used by the patched instruction, is zero, so the guest gets a
  198. * double fault and dies.
  199. */
  200. if (cpu->env.regs[R_ESP] == 0) {
  201. return -1;
  202. }
  203. if (kvm_enabled() && !kvm_irqchip_in_kernel()) {
  204. /*
  205. * KVM without kernel-based TPR access reporting will pass an IP that
  206. * points after the accessing instruction. So we need to look backward
  207. * to find the reason.
  208. */
  209. for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
  210. instr = &tpr_instr[i];
  211. if (instr->access != access) {
  212. continue;
  213. }
  214. if (cpu_memory_rw_debug(cs, ip - instr->length, opcode,
  215. sizeof(opcode), 0) < 0) {
  216. return -1;
  217. }
  218. if (opcode_matches(opcode, instr)) {
  219. ip -= instr->length;
  220. goto instruction_ok;
  221. }
  222. }
  223. return -1;
  224. } else {
  225. if (cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0) < 0) {
  226. return -1;
  227. }
  228. for (i = 0; i < ARRAY_SIZE(tpr_instr); i++) {
  229. instr = &tpr_instr[i];
  230. if (opcode_matches(opcode, instr)) {
  231. goto instruction_ok;
  232. }
  233. }
  234. return -1;
  235. }
  236. instruction_ok:
  237. /*
  238. * Grab the virtual TPR address from the instruction
  239. * and update the cached values.
  240. */
  241. if (cpu_memory_rw_debug(cs, ip + instr->addr_offset,
  242. (void *)&real_tpr_addr,
  243. sizeof(real_tpr_addr), 0) < 0) {
  244. return -1;
  245. }
  246. real_tpr_addr = le32_to_cpu(real_tpr_addr);
  247. if ((real_tpr_addr & 0xfff) != 0x80) {
  248. return -1;
  249. }
  250. s->real_tpr_addr = real_tpr_addr;
  251. update_guest_rom_state(s);
  252. *pip = ip;
  253. return 0;
  254. }
  255. static int update_rom_mapping(VAPICROMState *s, CPUX86State *env, target_ulong ip)
  256. {
  257. CPUState *cs = env_cpu(env);
  258. hwaddr paddr;
  259. uint32_t rom_state_vaddr;
  260. uint32_t pos, patch, offset;
  261. /* nothing to do if already activated */
  262. if (s->state == VAPIC_ACTIVE) {
  263. return 0;
  264. }
  265. /* bail out if ROM init code was not executed (missing ROM?) */
  266. if (s->state == VAPIC_INACTIVE) {
  267. return -1;
  268. }
  269. /* find out virtual address of the ROM */
  270. rom_state_vaddr = s->rom_state_paddr + (ip & 0xf0000000);
  271. paddr = cpu_get_phys_page_debug(cs, rom_state_vaddr);
  272. if (paddr == -1) {
  273. return -1;
  274. }
  275. paddr += rom_state_vaddr & ~TARGET_PAGE_MASK;
  276. if (paddr != s->rom_state_paddr) {
  277. return -1;
  278. }
  279. read_guest_rom_state(s);
  280. if (memcmp(s->rom_state.signature, "kvm aPiC", 8) != 0) {
  281. return -1;
  282. }
  283. s->rom_state_vaddr = rom_state_vaddr;
  284. /* fixup addresses in ROM if needed */
  285. if (rom_state_vaddr == le32_to_cpu(s->rom_state.vaddr)) {
  286. return 0;
  287. }
  288. for (pos = le32_to_cpu(s->rom_state.fixup_start);
  289. pos < le32_to_cpu(s->rom_state.fixup_end);
  290. pos += 4) {
  291. cpu_physical_memory_read(paddr + pos - s->rom_state.vaddr,
  292. &offset, sizeof(offset));
  293. offset = le32_to_cpu(offset);
  294. cpu_physical_memory_read(paddr + offset, &patch, sizeof(patch));
  295. patch = le32_to_cpu(patch);
  296. patch += rom_state_vaddr - le32_to_cpu(s->rom_state.vaddr);
  297. patch = cpu_to_le32(patch);
  298. cpu_physical_memory_write(paddr + offset, &patch, sizeof(patch));
  299. }
  300. read_guest_rom_state(s);
  301. s->vapic_paddr = paddr + le32_to_cpu(s->rom_state.vapic_vaddr) -
  302. le32_to_cpu(s->rom_state.vaddr);
  303. return 0;
  304. }
  305. /*
  306. * Tries to read the unique processor number from the Kernel Processor Control
  307. * Region (KPCR) of 32-bit Windows XP and Server 2003. Returns -1 if the KPCR
  308. * cannot be accessed or is considered invalid. This also ensures that we are
  309. * not patching the wrong guest.
  310. */
  311. static int get_kpcr_number(X86CPU *cpu)
  312. {
  313. CPUX86State *env = &cpu->env;
  314. struct kpcr {
  315. uint8_t fill1[0x1c];
  316. uint32_t self;
  317. uint8_t fill2[0x31];
  318. uint8_t number;
  319. } QEMU_PACKED kpcr;
  320. if (cpu_memory_rw_debug(CPU(cpu), env->segs[R_FS].base,
  321. (void *)&kpcr, sizeof(kpcr), 0) < 0 ||
  322. kpcr.self != env->segs[R_FS].base) {
  323. return -1;
  324. }
  325. return kpcr.number;
  326. }
  327. static int vapic_enable(VAPICROMState *s, X86CPU *cpu)
  328. {
  329. int cpu_number = get_kpcr_number(cpu);
  330. hwaddr vapic_paddr;
  331. static const uint8_t enabled = 1;
  332. if (cpu_number < 0) {
  333. return -1;
  334. }
  335. vapic_paddr = s->vapic_paddr +
  336. (((hwaddr)cpu_number) << VAPIC_CPU_SHIFT);
  337. cpu_physical_memory_write(vapic_paddr + offsetof(VAPICState, enabled),
  338. &enabled, sizeof(enabled));
  339. apic_enable_vapic(cpu->apic_state, vapic_paddr);
  340. s->state = VAPIC_ACTIVE;
  341. return 0;
  342. }
  343. static void patch_byte(X86CPU *cpu, target_ulong addr, uint8_t byte)
  344. {
  345. cpu_memory_rw_debug(CPU(cpu), addr, &byte, 1, 1);
  346. }
  347. static void patch_call(X86CPU *cpu, target_ulong ip, uint32_t target)
  348. {
  349. uint32_t offset;
  350. offset = cpu_to_le32(target - ip - 5);
  351. patch_byte(cpu, ip, 0xe8); /* call near */
  352. cpu_memory_rw_debug(CPU(cpu), ip + 1, (void *)&offset, sizeof(offset), 1);
  353. }
  354. typedef struct PatchInfo {
  355. VAPICHandlers *handler;
  356. target_ulong ip;
  357. } PatchInfo;
  358. static void do_patch_instruction(CPUState *cs, run_on_cpu_data data)
  359. {
  360. X86CPU *x86_cpu = X86_CPU(cs);
  361. PatchInfo *info = (PatchInfo *) data.host_ptr;
  362. VAPICHandlers *handlers = info->handler;
  363. target_ulong ip = info->ip;
  364. uint8_t opcode[2];
  365. uint32_t imm32 = 0;
  366. cpu_memory_rw_debug(cs, ip, opcode, sizeof(opcode), 0);
  367. switch (opcode[0]) {
  368. case 0x89: /* mov r32 to r/m32 */
  369. patch_byte(x86_cpu, ip, 0x50 + modrm_reg(opcode[1])); /* push reg */
  370. patch_call(x86_cpu, ip + 1, handlers->set_tpr);
  371. break;
  372. case 0x8b: /* mov r/m32 to r32 */
  373. patch_byte(x86_cpu, ip, 0x90);
  374. patch_call(x86_cpu, ip + 1, handlers->get_tpr[modrm_reg(opcode[1])]);
  375. break;
  376. case 0xa1: /* mov abs to eax */
  377. patch_call(x86_cpu, ip, handlers->get_tpr[0]);
  378. break;
  379. case 0xa3: /* mov eax to abs */
  380. patch_call(x86_cpu, ip, handlers->set_tpr_eax);
  381. break;
  382. case 0xc7: /* mov imm32, r/m32 (c7/0) */
  383. patch_byte(x86_cpu, ip, 0x68); /* push imm32 */
  384. cpu_memory_rw_debug(cs, ip + 6, (void *)&imm32, sizeof(imm32), 0);
  385. cpu_memory_rw_debug(cs, ip + 1, (void *)&imm32, sizeof(imm32), 1);
  386. patch_call(x86_cpu, ip + 5, handlers->set_tpr);
  387. break;
  388. case 0xff: /* push r/m32 */
  389. patch_byte(x86_cpu, ip, 0x50); /* push eax */
  390. patch_call(x86_cpu, ip + 1, handlers->get_tpr_stack);
  391. break;
  392. default:
  393. abort();
  394. }
  395. g_free(info);
  396. }
  397. static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
  398. {
  399. MachineState *ms = MACHINE(qdev_get_machine());
  400. CPUState *cs = CPU(cpu);
  401. VAPICHandlers *handlers;
  402. PatchInfo *info;
  403. if (ms->smp.cpus == 1) {
  404. handlers = &s->rom_state.up;
  405. } else {
  406. handlers = &s->rom_state.mp;
  407. }
  408. info = g_new(PatchInfo, 1);
  409. info->handler = handlers;
  410. info->ip = ip;
  411. async_safe_run_on_cpu(cs, do_patch_instruction, RUN_ON_CPU_HOST_PTR(info));
  412. }
  413. void vapic_report_tpr_access(DeviceState *dev, CPUState *cs, target_ulong ip,
  414. TPRAccess access)
  415. {
  416. VAPICROMState *s = VAPIC(dev);
  417. X86CPU *cpu = X86_CPU(cs);
  418. CPUX86State *env = &cpu->env;
  419. cpu_synchronize_state(cs);
  420. if (evaluate_tpr_instruction(s, cpu, &ip, access) < 0) {
  421. if (s->state == VAPIC_ACTIVE) {
  422. vapic_enable(s, cpu);
  423. }
  424. return;
  425. }
  426. if (update_rom_mapping(s, env, ip) < 0) {
  427. return;
  428. }
  429. if (vapic_enable(s, cpu) < 0) {
  430. return;
  431. }
  432. patch_instruction(s, cpu, ip);
  433. }
  434. typedef struct VAPICEnableTPRReporting {
  435. DeviceState *apic;
  436. bool enable;
  437. } VAPICEnableTPRReporting;
  438. static void vapic_do_enable_tpr_reporting(CPUState *cpu, run_on_cpu_data data)
  439. {
  440. VAPICEnableTPRReporting *info = data.host_ptr;
  441. apic_enable_tpr_access_reporting(info->apic, info->enable);
  442. }
  443. static void vapic_enable_tpr_reporting(bool enable)
  444. {
  445. VAPICEnableTPRReporting info = {
  446. .enable = enable,
  447. };
  448. CPUState *cs;
  449. X86CPU *cpu;
  450. CPU_FOREACH(cs) {
  451. cpu = X86_CPU(cs);
  452. info.apic = cpu->apic_state;
  453. run_on_cpu(cs, vapic_do_enable_tpr_reporting, RUN_ON_CPU_HOST_PTR(&info));
  454. }
  455. }
  456. static void vapic_reset(DeviceState *dev)
  457. {
  458. VAPICROMState *s = VAPIC(dev);
  459. s->state = VAPIC_INACTIVE;
  460. s->rom_state_paddr = 0;
  461. vapic_enable_tpr_reporting(false);
  462. }
  463. /*
  464. * Set the IRQ polling hypercalls to the supported variant:
  465. * - vmcall if using KVM in-kernel irqchip
  466. * - 32-bit VAPIC port write otherwise
  467. */
  468. static int patch_hypercalls(VAPICROMState *s)
  469. {
  470. hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
  471. static const uint8_t vmcall_pattern[] = { /* vmcall */
  472. 0xb8, 0x1, 0, 0, 0, 0xf, 0x1, 0xc1
  473. };
  474. static const uint8_t outl_pattern[] = { /* nop; outl %eax,0x7e */
  475. 0xb8, 0x1, 0, 0, 0, 0x90, 0xe7, 0x7e
  476. };
  477. uint8_t alternates[2];
  478. const uint8_t *pattern;
  479. const uint8_t *patch;
  480. off_t pos;
  481. uint8_t *rom;
  482. rom = g_malloc(s->rom_size);
  483. cpu_physical_memory_read(rom_paddr, rom, s->rom_size);
  484. for (pos = 0; pos < s->rom_size - sizeof(vmcall_pattern); pos++) {
  485. if (kvm_irqchip_in_kernel()) {
  486. pattern = outl_pattern;
  487. alternates[0] = outl_pattern[7];
  488. alternates[1] = outl_pattern[7];
  489. patch = &vmcall_pattern[5];
  490. } else {
  491. pattern = vmcall_pattern;
  492. alternates[0] = vmcall_pattern[7];
  493. alternates[1] = 0xd9; /* AMD's VMMCALL */
  494. patch = &outl_pattern[5];
  495. }
  496. if (memcmp(rom + pos, pattern, 7) == 0 &&
  497. (rom[pos + 7] == alternates[0] || rom[pos + 7] == alternates[1])) {
  498. cpu_physical_memory_write(rom_paddr + pos + 5, patch, 3);
  499. /*
  500. * Don't flush the tb here. Under ordinary conditions, the patched
  501. * calls are miles away from the current IP. Under malicious
  502. * conditions, the guest could trick us to crash.
  503. */
  504. }
  505. }
  506. g_free(rom);
  507. return 0;
  508. }
  509. /*
  510. * For TCG mode or the time KVM honors read-only memory regions, we need to
  511. * enable write access to the option ROM so that variables can be updated by
  512. * the guest.
  513. */
  514. static int vapic_map_rom_writable(VAPICROMState *s)
  515. {
  516. hwaddr rom_paddr = s->rom_state_paddr & ROM_BLOCK_MASK;
  517. MemoryRegionSection section;
  518. MemoryRegion *mr = get_system_memory();
  519. size_t rom_size;
  520. uint8_t *ram;
  521. if (s->rom_mapped_writable) {
  522. memory_region_del_subregion(mr, &s->rom);
  523. object_unparent(OBJECT(&s->rom));
  524. }
  525. /* grab RAM memory region (region @rom_paddr may still be pc.rom) */
  526. section = memory_region_find(mr, 0, 1);
  527. /* read ROM size from RAM region */
  528. if (rom_paddr + 2 >= memory_region_size(section.mr)) {
  529. return -1;
  530. }
  531. ram = memory_region_get_ram_ptr(section.mr);
  532. rom_size = ram[rom_paddr + 2] * ROM_BLOCK_SIZE;
  533. if (rom_size == 0) {
  534. return -1;
  535. }
  536. s->rom_size = rom_size;
  537. /* We need to round to avoid creating subpages
  538. * from which we cannot run code. */
  539. rom_size += rom_paddr & ~TARGET_PAGE_MASK;
  540. rom_paddr &= TARGET_PAGE_MASK;
  541. rom_size = TARGET_PAGE_ALIGN(rom_size);
  542. memory_region_init_alias(&s->rom, OBJECT(s), "kvmvapic-rom", section.mr,
  543. rom_paddr, rom_size);
  544. memory_region_add_subregion_overlap(mr, rom_paddr, &s->rom, 1000);
  545. s->rom_mapped_writable = true;
  546. memory_region_unref(section.mr);
  547. return 0;
  548. }
  549. static int vapic_prepare(VAPICROMState *s)
  550. {
  551. if (vapic_map_rom_writable(s) < 0) {
  552. return -1;
  553. }
  554. if (patch_hypercalls(s) < 0) {
  555. return -1;
  556. }
  557. vapic_enable_tpr_reporting(true);
  558. return 0;
  559. }
  560. static void vapic_write(void *opaque, hwaddr addr, uint64_t data,
  561. unsigned int size)
  562. {
  563. VAPICROMState *s = opaque;
  564. X86CPU *cpu;
  565. CPUX86State *env;
  566. hwaddr rom_paddr;
  567. if (!current_cpu) {
  568. return;
  569. }
  570. cpu_synchronize_state(current_cpu);
  571. cpu = X86_CPU(current_cpu);
  572. env = &cpu->env;
  573. /*
  574. * The VAPIC supports two PIO-based hypercalls, both via port 0x7E.
  575. * o 16-bit write access:
  576. * Reports the option ROM initialization to the hypervisor. Written
  577. * value is the offset of the state structure in the ROM.
  578. * o 8-bit write access:
  579. * Reactivates the VAPIC after a guest hibernation, i.e. after the
  580. * option ROM content has been re-initialized by a guest power cycle.
  581. * o 32-bit write access:
  582. * Poll for pending IRQs, considering the current VAPIC state.
  583. */
  584. switch (size) {
  585. case 2:
  586. if (s->state == VAPIC_INACTIVE) {
  587. rom_paddr = (env->segs[R_CS].base + env->eip) & ROM_BLOCK_MASK;
  588. s->rom_state_paddr = rom_paddr + data;
  589. s->state = VAPIC_STANDBY;
  590. }
  591. if (vapic_prepare(s) < 0) {
  592. s->state = VAPIC_INACTIVE;
  593. s->rom_state_paddr = 0;
  594. break;
  595. }
  596. break;
  597. case 1:
  598. if (kvm_enabled()) {
  599. /*
  600. * Disable triggering instruction in ROM by writing a NOP.
  601. *
  602. * We cannot do this in TCG mode as the reported IP is not
  603. * accurate.
  604. */
  605. pause_all_vcpus();
  606. patch_byte(cpu, env->eip - 2, 0x66);
  607. patch_byte(cpu, env->eip - 1, 0x90);
  608. resume_all_vcpus();
  609. }
  610. if (s->state == VAPIC_ACTIVE) {
  611. break;
  612. }
  613. if (update_rom_mapping(s, env, env->eip) < 0) {
  614. break;
  615. }
  616. if (find_real_tpr_addr(s, env) < 0) {
  617. break;
  618. }
  619. vapic_enable(s, cpu);
  620. break;
  621. default:
  622. case 4:
  623. if (!kvm_irqchip_in_kernel()) {
  624. apic_poll_irq(cpu->apic_state);
  625. }
  626. break;
  627. }
  628. }
  629. static uint64_t vapic_read(void *opaque, hwaddr addr, unsigned size)
  630. {
  631. return 0xffffffff;
  632. }
  633. static const MemoryRegionOps vapic_ops = {
  634. .write = vapic_write,
  635. .read = vapic_read,
  636. .endianness = DEVICE_LITTLE_ENDIAN,
  637. };
  638. static void vapic_realize(DeviceState *dev, Error **errp)
  639. {
  640. SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
  641. VAPICROMState *s = VAPIC(dev);
  642. memory_region_init_io(&s->io, OBJECT(s), &vapic_ops, s, "kvmvapic", 2);
  643. memory_region_add_subregion(get_system_io(), VAPIC_IO_PORT, &s->io);
  644. sysbus_init_ioports(sbd, VAPIC_IO_PORT, 2);
  645. option_rom[nb_option_roms].name = "kvmvapic.bin";
  646. option_rom[nb_option_roms].bootindex = -1;
  647. nb_option_roms++;
  648. }
  649. static void do_vapic_enable(CPUState *cs, run_on_cpu_data data)
  650. {
  651. VAPICROMState *s = data.host_ptr;
  652. X86CPU *cpu = X86_CPU(cs);
  653. static const uint8_t enabled = 1;
  654. cpu_physical_memory_write(s->vapic_paddr + offsetof(VAPICState, enabled),
  655. &enabled, sizeof(enabled));
  656. apic_enable_vapic(cpu->apic_state, s->vapic_paddr);
  657. s->state = VAPIC_ACTIVE;
  658. }
  659. static void vapic_vm_state_change(void *opaque, bool running, RunState state)
  660. {
  661. MachineState *ms = MACHINE(qdev_get_machine());
  662. VAPICROMState *s = opaque;
  663. uint8_t *zero;
  664. if (!running) {
  665. return;
  666. }
  667. if (s->state == VAPIC_ACTIVE) {
  668. if (ms->smp.cpus == 1) {
  669. run_on_cpu(first_cpu, do_vapic_enable, RUN_ON_CPU_HOST_PTR(s));
  670. } else {
  671. zero = g_malloc0(s->rom_state.vapic_size);
  672. cpu_physical_memory_write(s->vapic_paddr, zero,
  673. s->rom_state.vapic_size);
  674. g_free(zero);
  675. }
  676. }
  677. qemu_del_vm_change_state_handler(s->vmsentry);
  678. s->vmsentry = NULL;
  679. }
  680. static int vapic_post_load(void *opaque, int version_id)
  681. {
  682. VAPICROMState *s = opaque;
  683. /*
  684. * The old implementation of qemu-kvm did not provide the state
  685. * VAPIC_STANDBY. Reconstruct it.
  686. */
  687. if (s->state == VAPIC_INACTIVE && s->rom_state_paddr != 0) {
  688. s->state = VAPIC_STANDBY;
  689. }
  690. if (s->state != VAPIC_INACTIVE) {
  691. if (vapic_prepare(s) < 0) {
  692. return -1;
  693. }
  694. }
  695. if (!s->vmsentry) {
  696. s->vmsentry =
  697. qemu_add_vm_change_state_handler(vapic_vm_state_change, s);
  698. }
  699. return 0;
  700. }
  701. static const VMStateDescription vmstate_handlers = {
  702. .name = "kvmvapic-handlers",
  703. .version_id = 1,
  704. .minimum_version_id = 1,
  705. .fields = (const VMStateField[]) {
  706. VMSTATE_UINT32(set_tpr, VAPICHandlers),
  707. VMSTATE_UINT32(set_tpr_eax, VAPICHandlers),
  708. VMSTATE_UINT32_ARRAY(get_tpr, VAPICHandlers, 8),
  709. VMSTATE_UINT32(get_tpr_stack, VAPICHandlers),
  710. VMSTATE_END_OF_LIST()
  711. }
  712. };
  713. static const VMStateDescription vmstate_guest_rom = {
  714. .name = "kvmvapic-guest-rom",
  715. .version_id = 1,
  716. .minimum_version_id = 1,
  717. .fields = (const VMStateField[]) {
  718. VMSTATE_UNUSED(8), /* signature */
  719. VMSTATE_UINT32(vaddr, GuestROMState),
  720. VMSTATE_UINT32(fixup_start, GuestROMState),
  721. VMSTATE_UINT32(fixup_end, GuestROMState),
  722. VMSTATE_UINT32(vapic_vaddr, GuestROMState),
  723. VMSTATE_UINT32(vapic_size, GuestROMState),
  724. VMSTATE_UINT32(vcpu_shift, GuestROMState),
  725. VMSTATE_UINT32(real_tpr_addr, GuestROMState),
  726. VMSTATE_STRUCT(up, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
  727. VMSTATE_STRUCT(mp, GuestROMState, 0, vmstate_handlers, VAPICHandlers),
  728. VMSTATE_END_OF_LIST()
  729. }
  730. };
  731. static const VMStateDescription vmstate_vapic = {
  732. .name = "kvm-tpr-opt", /* compatible with qemu-kvm VAPIC */
  733. .version_id = 1,
  734. .minimum_version_id = 1,
  735. .post_load = vapic_post_load,
  736. .fields = (const VMStateField[]) {
  737. VMSTATE_STRUCT(rom_state, VAPICROMState, 0, vmstate_guest_rom,
  738. GuestROMState),
  739. VMSTATE_UINT32(state, VAPICROMState),
  740. VMSTATE_UINT32(real_tpr_addr, VAPICROMState),
  741. VMSTATE_UINT32(rom_state_vaddr, VAPICROMState),
  742. VMSTATE_UINT32(vapic_paddr, VAPICROMState),
  743. VMSTATE_UINT32(rom_state_paddr, VAPICROMState),
  744. VMSTATE_END_OF_LIST()
  745. }
  746. };
  747. static void vapic_class_init(ObjectClass *klass, void *data)
  748. {
  749. DeviceClass *dc = DEVICE_CLASS(klass);
  750. device_class_set_legacy_reset(dc, vapic_reset);
  751. dc->vmsd = &vmstate_vapic;
  752. dc->realize = vapic_realize;
  753. }
  754. static const TypeInfo vapic_type = {
  755. .name = TYPE_VAPIC,
  756. .parent = TYPE_SYS_BUS_DEVICE,
  757. .instance_size = sizeof(VAPICROMState),
  758. .class_init = vapic_class_init,
  759. };
  760. static void vapic_register(void)
  761. {
  762. type_register_static(&vapic_type);
  763. }
  764. type_init(vapic_register);