xen-hvm.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422
  1. /*
  2. * Copyright (C) 2010 Citrix Ltd.
  3. *
  4. * This work is licensed under the terms of the GNU GPL, version 2. See
  5. * the COPYING file in the top-level directory.
  6. *
  7. * Contributions after 2012-01-13 are licensed under the terms of the
  8. * GNU GPL, version 2 or (at your option) any later version.
  9. */
  10. #include "qemu/osdep.h"
  11. #include "cpu.h"
  12. #include "hw/pci/pci.h"
  13. #include "hw/i386/pc.h"
  14. #include "hw/i386/apic-msidef.h"
  15. #include "hw/xen/xen_common.h"
  16. #include "hw/xen/xen_backend.h"
  17. #include "qmp-commands.h"
  18. #include "sysemu/char.h"
  19. #include "qemu/error-report.h"
  20. #include "qemu/range.h"
  21. #include "sysemu/xen-mapcache.h"
  22. #include "trace-root.h"
  23. #include "exec/address-spaces.h"
  24. #include <xen/hvm/ioreq.h>
  25. #include <xen/hvm/params.h>
  26. #include <xen/hvm/e820.h>
  27. //#define DEBUG_XEN_HVM
  28. #ifdef DEBUG_XEN_HVM
  29. #define DPRINTF(fmt, ...) \
  30. do { fprintf(stderr, "xen: " fmt, ## __VA_ARGS__); } while (0)
  31. #else
  32. #define DPRINTF(fmt, ...) \
  33. do { } while (0)
  34. #endif
  35. static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi;
  36. static MemoryRegion *framebuffer;
  37. static bool xen_in_migration;
  38. /* Compatibility with older version */
  39. /* This allows QEMU to build on a system that has Xen 4.5 or earlier
  40. * installed. This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
  41. * needs to be included before this block and hw/xen/xen_common.h needs to
  42. * be included before xen/hvm/ioreq.h
  43. */
  44. #ifndef IOREQ_TYPE_VMWARE_PORT
  45. #define IOREQ_TYPE_VMWARE_PORT 3
  46. struct vmware_regs {
  47. uint32_t esi;
  48. uint32_t edi;
  49. uint32_t ebx;
  50. uint32_t ecx;
  51. uint32_t edx;
  52. };
  53. typedef struct vmware_regs vmware_regs_t;
  54. struct shared_vmport_iopage {
  55. struct vmware_regs vcpu_vmport_regs[1];
  56. };
  57. typedef struct shared_vmport_iopage shared_vmport_iopage_t;
  58. #endif
  59. static inline uint32_t xen_vcpu_eport(shared_iopage_t *shared_page, int i)
  60. {
  61. return shared_page->vcpu_ioreq[i].vp_eport;
  62. }
  63. static inline ioreq_t *xen_vcpu_ioreq(shared_iopage_t *shared_page, int vcpu)
  64. {
  65. return &shared_page->vcpu_ioreq[vcpu];
  66. }
  67. #define BUFFER_IO_MAX_DELAY 100
  68. typedef struct XenPhysmap {
  69. hwaddr start_addr;
  70. ram_addr_t size;
  71. const char *name;
  72. hwaddr phys_offset;
  73. QLIST_ENTRY(XenPhysmap) list;
  74. } XenPhysmap;
  75. typedef struct XenIOState {
  76. ioservid_t ioservid;
  77. shared_iopage_t *shared_page;
  78. shared_vmport_iopage_t *shared_vmport_page;
  79. buffered_iopage_t *buffered_io_page;
  80. QEMUTimer *buffered_io_timer;
  81. CPUState **cpu_by_vcpu_id;
  82. /* the evtchn port for polling the notification, */
  83. evtchn_port_t *ioreq_local_port;
  84. /* evtchn local port for buffered io */
  85. evtchn_port_t bufioreq_local_port;
  86. /* the evtchn fd for polling */
  87. xenevtchn_handle *xce_handle;
  88. /* which vcpu we are serving */
  89. int send_vcpu;
  90. struct xs_handle *xenstore;
  91. MemoryListener memory_listener;
  92. MemoryListener io_listener;
  93. DeviceListener device_listener;
  94. QLIST_HEAD(, XenPhysmap) physmap;
  95. hwaddr free_phys_offset;
  96. const XenPhysmap *log_for_dirtybit;
  97. Notifier exit;
  98. Notifier suspend;
  99. Notifier wakeup;
  100. } XenIOState;
  101. /* Xen specific function for piix pci */
  102. int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
  103. {
  104. return irq_num + ((pci_dev->devfn >> 3) << 2);
  105. }
  106. void xen_piix3_set_irq(void *opaque, int irq_num, int level)
  107. {
  108. xc_hvm_set_pci_intx_level(xen_xc, xen_domid, 0, 0, irq_num >> 2,
  109. irq_num & 3, level);
  110. }
  111. void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len)
  112. {
  113. int i;
  114. /* Scan for updates to PCI link routes (0x60-0x63). */
  115. for (i = 0; i < len; i++) {
  116. uint8_t v = (val >> (8 * i)) & 0xff;
  117. if (v & 0x80) {
  118. v = 0;
  119. }
  120. v &= 0xf;
  121. if (((address + i) >= 0x60) && ((address + i) <= 0x63)) {
  122. xc_hvm_set_pci_link_route(xen_xc, xen_domid, address + i - 0x60, v);
  123. }
  124. }
  125. }
  126. int xen_is_pirq_msi(uint32_t msi_data)
  127. {
  128. /* If vector is 0, the msi is remapped into a pirq, passed as
  129. * dest_id.
  130. */
  131. return ((msi_data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT) == 0;
  132. }
  133. void xen_hvm_inject_msi(uint64_t addr, uint32_t data)
  134. {
  135. xc_hvm_inject_msi(xen_xc, xen_domid, addr, data);
  136. }
  137. static void xen_suspend_notifier(Notifier *notifier, void *data)
  138. {
  139. xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3);
  140. }
  141. /* Xen Interrupt Controller */
  142. static void xen_set_irq(void *opaque, int irq, int level)
  143. {
  144. xc_hvm_set_isa_irq_level(xen_xc, xen_domid, irq, level);
  145. }
  146. qemu_irq *xen_interrupt_controller_init(void)
  147. {
  148. return qemu_allocate_irqs(xen_set_irq, NULL, 16);
  149. }
  150. /* Memory Ops */
  151. static void xen_ram_init(PCMachineState *pcms,
  152. ram_addr_t ram_size, MemoryRegion **ram_memory_p)
  153. {
  154. MemoryRegion *sysmem = get_system_memory();
  155. ram_addr_t block_len;
  156. uint64_t user_lowmem = object_property_get_int(qdev_get_machine(),
  157. PC_MACHINE_MAX_RAM_BELOW_4G,
  158. &error_abort);
  159. /* Handle the machine opt max-ram-below-4g. It is basically doing
  160. * min(xen limit, user limit).
  161. */
  162. if (!user_lowmem) {
  163. user_lowmem = HVM_BELOW_4G_RAM_END; /* default */
  164. }
  165. if (HVM_BELOW_4G_RAM_END <= user_lowmem) {
  166. user_lowmem = HVM_BELOW_4G_RAM_END;
  167. }
  168. if (ram_size >= user_lowmem) {
  169. pcms->above_4g_mem_size = ram_size - user_lowmem;
  170. pcms->below_4g_mem_size = user_lowmem;
  171. } else {
  172. pcms->above_4g_mem_size = 0;
  173. pcms->below_4g_mem_size = ram_size;
  174. }
  175. if (!pcms->above_4g_mem_size) {
  176. block_len = ram_size;
  177. } else {
  178. /*
  179. * Xen does not allocate the memory continuously, it keeps a
  180. * hole of the size computed above or passed in.
  181. */
  182. block_len = (1ULL << 32) + pcms->above_4g_mem_size;
  183. }
  184. memory_region_init_ram(&ram_memory, NULL, "xen.ram", block_len,
  185. &error_fatal);
  186. *ram_memory_p = &ram_memory;
  187. vmstate_register_ram_global(&ram_memory);
  188. memory_region_init_alias(&ram_640k, NULL, "xen.ram.640k",
  189. &ram_memory, 0, 0xa0000);
  190. memory_region_add_subregion(sysmem, 0, &ram_640k);
  191. /* Skip of the VGA IO memory space, it will be registered later by the VGA
  192. * emulated device.
  193. *
  194. * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load
  195. * the Options ROM, so it is registered here as RAM.
  196. */
  197. memory_region_init_alias(&ram_lo, NULL, "xen.ram.lo",
  198. &ram_memory, 0xc0000,
  199. pcms->below_4g_mem_size - 0xc0000);
  200. memory_region_add_subregion(sysmem, 0xc0000, &ram_lo);
  201. if (pcms->above_4g_mem_size > 0) {
  202. memory_region_init_alias(&ram_hi, NULL, "xen.ram.hi",
  203. &ram_memory, 0x100000000ULL,
  204. pcms->above_4g_mem_size);
  205. memory_region_add_subregion(sysmem, 0x100000000ULL, &ram_hi);
  206. }
  207. }
  208. void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
  209. Error **errp)
  210. {
  211. unsigned long nr_pfn;
  212. xen_pfn_t *pfn_list;
  213. int i;
  214. if (runstate_check(RUN_STATE_INMIGRATE)) {
  215. /* RAM already populated in Xen */
  216. fprintf(stderr, "%s: do not alloc "RAM_ADDR_FMT
  217. " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE\n",
  218. __func__, size, ram_addr);
  219. return;
  220. }
  221. if (mr == &ram_memory) {
  222. return;
  223. }
  224. trace_xen_ram_alloc(ram_addr, size);
  225. nr_pfn = size >> TARGET_PAGE_BITS;
  226. pfn_list = g_malloc(sizeof (*pfn_list) * nr_pfn);
  227. for (i = 0; i < nr_pfn; i++) {
  228. pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i;
  229. }
  230. if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
  231. error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
  232. ram_addr);
  233. }
  234. g_free(pfn_list);
  235. }
  236. static XenPhysmap *get_physmapping(XenIOState *state,
  237. hwaddr start_addr, ram_addr_t size)
  238. {
  239. XenPhysmap *physmap = NULL;
  240. start_addr &= TARGET_PAGE_MASK;
  241. QLIST_FOREACH(physmap, &state->physmap, list) {
  242. if (range_covers_byte(physmap->start_addr, physmap->size, start_addr)) {
  243. return physmap;
  244. }
  245. }
  246. return NULL;
  247. }
  248. static hwaddr xen_phys_offset_to_gaddr(hwaddr start_addr,
  249. ram_addr_t size, void *opaque)
  250. {
  251. hwaddr addr = start_addr & TARGET_PAGE_MASK;
  252. XenIOState *xen_io_state = opaque;
  253. XenPhysmap *physmap = NULL;
  254. QLIST_FOREACH(physmap, &xen_io_state->physmap, list) {
  255. if (range_covers_byte(physmap->phys_offset, physmap->size, addr)) {
  256. return physmap->start_addr;
  257. }
  258. }
  259. return start_addr;
  260. }
  261. static int xen_add_to_physmap(XenIOState *state,
  262. hwaddr start_addr,
  263. ram_addr_t size,
  264. MemoryRegion *mr,
  265. hwaddr offset_within_region)
  266. {
  267. unsigned long i = 0;
  268. int rc = 0;
  269. XenPhysmap *physmap = NULL;
  270. hwaddr pfn, start_gpfn;
  271. hwaddr phys_offset = memory_region_get_ram_addr(mr);
  272. char path[80], value[17];
  273. const char *mr_name;
  274. if (get_physmapping(state, start_addr, size)) {
  275. return 0;
  276. }
  277. if (size <= 0) {
  278. return -1;
  279. }
  280. /* Xen can only handle a single dirty log region for now and we want
  281. * the linear framebuffer to be that region.
  282. * Avoid tracking any regions that is not videoram and avoid tracking
  283. * the legacy vga region. */
  284. if (mr == framebuffer && start_addr > 0xbffff) {
  285. goto go_physmap;
  286. }
  287. return -1;
  288. go_physmap:
  289. DPRINTF("mapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx"\n",
  290. start_addr, start_addr + size);
  291. pfn = phys_offset >> TARGET_PAGE_BITS;
  292. start_gpfn = start_addr >> TARGET_PAGE_BITS;
  293. for (i = 0; i < size >> TARGET_PAGE_BITS; i++) {
  294. unsigned long idx = pfn + i;
  295. xen_pfn_t gpfn = start_gpfn + i;
  296. rc = xen_xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn);
  297. if (rc) {
  298. DPRINTF("add_to_physmap MFN %"PRI_xen_pfn" to PFN %"
  299. PRI_xen_pfn" failed: %d (errno: %d)\n", idx, gpfn, rc, errno);
  300. return -rc;
  301. }
  302. }
  303. mr_name = memory_region_name(mr);
  304. physmap = g_malloc(sizeof (XenPhysmap));
  305. physmap->start_addr = start_addr;
  306. physmap->size = size;
  307. physmap->name = mr_name;
  308. physmap->phys_offset = phys_offset;
  309. QLIST_INSERT_HEAD(&state->physmap, physmap, list);
  310. xc_domain_pin_memory_cacheattr(xen_xc, xen_domid,
  311. start_addr >> TARGET_PAGE_BITS,
  312. (start_addr + size - 1) >> TARGET_PAGE_BITS,
  313. XEN_DOMCTL_MEM_CACHEATTR_WB);
  314. snprintf(path, sizeof(path),
  315. "/local/domain/0/device-model/%d/physmap/%"PRIx64"/start_addr",
  316. xen_domid, (uint64_t)phys_offset);
  317. snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)start_addr);
  318. if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
  319. return -1;
  320. }
  321. snprintf(path, sizeof(path),
  322. "/local/domain/0/device-model/%d/physmap/%"PRIx64"/size",
  323. xen_domid, (uint64_t)phys_offset);
  324. snprintf(value, sizeof(value), "%"PRIx64, (uint64_t)size);
  325. if (!xs_write(state->xenstore, 0, path, value, strlen(value))) {
  326. return -1;
  327. }
  328. if (mr_name) {
  329. snprintf(path, sizeof(path),
  330. "/local/domain/0/device-model/%d/physmap/%"PRIx64"/name",
  331. xen_domid, (uint64_t)phys_offset);
  332. if (!xs_write(state->xenstore, 0, path, mr_name, strlen(mr_name))) {
  333. return -1;
  334. }
  335. }
  336. return 0;
  337. }
  338. static int xen_remove_from_physmap(XenIOState *state,
  339. hwaddr start_addr,
  340. ram_addr_t size)
  341. {
  342. unsigned long i = 0;
  343. int rc = 0;
  344. XenPhysmap *physmap = NULL;
  345. hwaddr phys_offset = 0;
  346. physmap = get_physmapping(state, start_addr, size);
  347. if (physmap == NULL) {
  348. return -1;
  349. }
  350. phys_offset = physmap->phys_offset;
  351. size = physmap->size;
  352. DPRINTF("unmapping vram to %"HWADDR_PRIx" - %"HWADDR_PRIx", at "
  353. "%"HWADDR_PRIx"\n", start_addr, start_addr + size, phys_offset);
  354. size >>= TARGET_PAGE_BITS;
  355. start_addr >>= TARGET_PAGE_BITS;
  356. phys_offset >>= TARGET_PAGE_BITS;
  357. for (i = 0; i < size; i++) {
  358. xen_pfn_t idx = start_addr + i;
  359. xen_pfn_t gpfn = phys_offset + i;
  360. rc = xen_xc_domain_add_to_physmap(xen_xc, xen_domid, XENMAPSPACE_gmfn, idx, gpfn);
  361. if (rc) {
  362. fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"
  363. PRI_xen_pfn" failed: %d (errno: %d)\n", idx, gpfn, rc, errno);
  364. return -rc;
  365. }
  366. }
  367. QLIST_REMOVE(physmap, list);
  368. if (state->log_for_dirtybit == physmap) {
  369. state->log_for_dirtybit = NULL;
  370. }
  371. g_free(physmap);
  372. return 0;
  373. }
  374. static void xen_set_memory(struct MemoryListener *listener,
  375. MemoryRegionSection *section,
  376. bool add)
  377. {
  378. XenIOState *state = container_of(listener, XenIOState, memory_listener);
  379. hwaddr start_addr = section->offset_within_address_space;
  380. ram_addr_t size = int128_get64(section->size);
  381. bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA);
  382. hvmmem_type_t mem_type;
  383. if (section->mr == &ram_memory) {
  384. return;
  385. } else {
  386. if (add) {
  387. xen_map_memory_section(xen_xc, xen_domid, state->ioservid,
  388. section);
  389. } else {
  390. xen_unmap_memory_section(xen_xc, xen_domid, state->ioservid,
  391. section);
  392. }
  393. }
  394. if (!memory_region_is_ram(section->mr)) {
  395. return;
  396. }
  397. if (log_dirty != add) {
  398. return;
  399. }
  400. trace_xen_client_set_memory(start_addr, size, log_dirty);
  401. start_addr &= TARGET_PAGE_MASK;
  402. size = TARGET_PAGE_ALIGN(size);
  403. if (add) {
  404. if (!memory_region_is_rom(section->mr)) {
  405. xen_add_to_physmap(state, start_addr, size,
  406. section->mr, section->offset_within_region);
  407. } else {
  408. mem_type = HVMMEM_ram_ro;
  409. if (xc_hvm_set_mem_type(xen_xc, xen_domid, mem_type,
  410. start_addr >> TARGET_PAGE_BITS,
  411. size >> TARGET_PAGE_BITS)) {
  412. DPRINTF("xc_hvm_set_mem_type error, addr: "TARGET_FMT_plx"\n",
  413. start_addr);
  414. }
  415. }
  416. } else {
  417. if (xen_remove_from_physmap(state, start_addr, size) < 0) {
  418. DPRINTF("physmapping does not exist at "TARGET_FMT_plx"\n", start_addr);
  419. }
  420. }
  421. }
  422. static void xen_region_add(MemoryListener *listener,
  423. MemoryRegionSection *section)
  424. {
  425. memory_region_ref(section->mr);
  426. xen_set_memory(listener, section, true);
  427. }
  428. static void xen_region_del(MemoryListener *listener,
  429. MemoryRegionSection *section)
  430. {
  431. xen_set_memory(listener, section, false);
  432. memory_region_unref(section->mr);
  433. }
  434. static void xen_io_add(MemoryListener *listener,
  435. MemoryRegionSection *section)
  436. {
  437. XenIOState *state = container_of(listener, XenIOState, io_listener);
  438. MemoryRegion *mr = section->mr;
  439. if (mr->ops == &unassigned_io_ops) {
  440. return;
  441. }
  442. memory_region_ref(mr);
  443. xen_map_io_section(xen_xc, xen_domid, state->ioservid, section);
  444. }
  445. static void xen_io_del(MemoryListener *listener,
  446. MemoryRegionSection *section)
  447. {
  448. XenIOState *state = container_of(listener, XenIOState, io_listener);
  449. MemoryRegion *mr = section->mr;
  450. if (mr->ops == &unassigned_io_ops) {
  451. return;
  452. }
  453. xen_unmap_io_section(xen_xc, xen_domid, state->ioservid, section);
  454. memory_region_unref(mr);
  455. }
  456. static void xen_device_realize(DeviceListener *listener,
  457. DeviceState *dev)
  458. {
  459. XenIOState *state = container_of(listener, XenIOState, device_listener);
  460. if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  461. PCIDevice *pci_dev = PCI_DEVICE(dev);
  462. xen_map_pcidev(xen_xc, xen_domid, state->ioservid, pci_dev);
  463. }
  464. }
  465. static void xen_device_unrealize(DeviceListener *listener,
  466. DeviceState *dev)
  467. {
  468. XenIOState *state = container_of(listener, XenIOState, device_listener);
  469. if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  470. PCIDevice *pci_dev = PCI_DEVICE(dev);
  471. xen_unmap_pcidev(xen_xc, xen_domid, state->ioservid, pci_dev);
  472. }
  473. }
  474. static void xen_sync_dirty_bitmap(XenIOState *state,
  475. hwaddr start_addr,
  476. ram_addr_t size)
  477. {
  478. hwaddr npages = size >> TARGET_PAGE_BITS;
  479. const int width = sizeof(unsigned long) * 8;
  480. unsigned long bitmap[DIV_ROUND_UP(npages, width)];
  481. int rc, i, j;
  482. const XenPhysmap *physmap = NULL;
  483. physmap = get_physmapping(state, start_addr, size);
  484. if (physmap == NULL) {
  485. /* not handled */
  486. return;
  487. }
  488. if (state->log_for_dirtybit == NULL) {
  489. state->log_for_dirtybit = physmap;
  490. } else if (state->log_for_dirtybit != physmap) {
  491. /* Only one range for dirty bitmap can be tracked. */
  492. return;
  493. }
  494. rc = xc_hvm_track_dirty_vram(xen_xc, xen_domid,
  495. start_addr >> TARGET_PAGE_BITS, npages,
  496. bitmap);
  497. if (rc < 0) {
  498. #ifndef ENODATA
  499. #define ENODATA ENOENT
  500. #endif
  501. if (errno == ENODATA) {
  502. memory_region_set_dirty(framebuffer, 0, size);
  503. DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx
  504. ", 0x" TARGET_FMT_plx "): %s\n",
  505. start_addr, start_addr + size, strerror(errno));
  506. }
  507. return;
  508. }
  509. for (i = 0; i < ARRAY_SIZE(bitmap); i++) {
  510. unsigned long map = bitmap[i];
  511. while (map != 0) {
  512. j = ctzl(map);
  513. map &= ~(1ul << j);
  514. memory_region_set_dirty(framebuffer,
  515. (i * width + j) * TARGET_PAGE_SIZE,
  516. TARGET_PAGE_SIZE);
  517. };
  518. }
  519. }
  520. static void xen_log_start(MemoryListener *listener,
  521. MemoryRegionSection *section,
  522. int old, int new)
  523. {
  524. XenIOState *state = container_of(listener, XenIOState, memory_listener);
  525. if (new & ~old & (1 << DIRTY_MEMORY_VGA)) {
  526. xen_sync_dirty_bitmap(state, section->offset_within_address_space,
  527. int128_get64(section->size));
  528. }
  529. }
  530. static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section,
  531. int old, int new)
  532. {
  533. XenIOState *state = container_of(listener, XenIOState, memory_listener);
  534. if (old & ~new & (1 << DIRTY_MEMORY_VGA)) {
  535. state->log_for_dirtybit = NULL;
  536. /* Disable dirty bit tracking */
  537. xc_hvm_track_dirty_vram(xen_xc, xen_domid, 0, 0, NULL);
  538. }
  539. }
  540. static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
  541. {
  542. XenIOState *state = container_of(listener, XenIOState, memory_listener);
  543. xen_sync_dirty_bitmap(state, section->offset_within_address_space,
  544. int128_get64(section->size));
  545. }
  546. static void xen_log_global_start(MemoryListener *listener)
  547. {
  548. if (xen_enabled()) {
  549. xen_in_migration = true;
  550. }
  551. }
  552. static void xen_log_global_stop(MemoryListener *listener)
  553. {
  554. xen_in_migration = false;
  555. }
  556. static MemoryListener xen_memory_listener = {
  557. .region_add = xen_region_add,
  558. .region_del = xen_region_del,
  559. .log_start = xen_log_start,
  560. .log_stop = xen_log_stop,
  561. .log_sync = xen_log_sync,
  562. .log_global_start = xen_log_global_start,
  563. .log_global_stop = xen_log_global_stop,
  564. .priority = 10,
  565. };
  566. static MemoryListener xen_io_listener = {
  567. .region_add = xen_io_add,
  568. .region_del = xen_io_del,
  569. .priority = 10,
  570. };
  571. static DeviceListener xen_device_listener = {
  572. .realize = xen_device_realize,
  573. .unrealize = xen_device_unrealize,
  574. };
  575. /* get the ioreq packets from share mem */
  576. static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
  577. {
  578. ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
  579. if (req->state != STATE_IOREQ_READY) {
  580. DPRINTF("I/O request not ready: "
  581. "%x, ptr: %x, port: %"PRIx64", "
  582. "data: %"PRIx64", count: %u, size: %u\n",
  583. req->state, req->data_is_ptr, req->addr,
  584. req->data, req->count, req->size);
  585. return NULL;
  586. }
  587. xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
  588. req->state = STATE_IOREQ_INPROCESS;
  589. return req;
  590. }
  591. /* use poll to get the port notification */
  592. /* ioreq_vec--out,the */
  593. /* retval--the number of ioreq packet */
  594. static ioreq_t *cpu_get_ioreq(XenIOState *state)
  595. {
  596. int i;
  597. evtchn_port_t port;
  598. port = xenevtchn_pending(state->xce_handle);
  599. if (port == state->bufioreq_local_port) {
  600. timer_mod(state->buffered_io_timer,
  601. BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  602. return NULL;
  603. }
  604. if (port != -1) {
  605. for (i = 0; i < max_cpus; i++) {
  606. if (state->ioreq_local_port[i] == port) {
  607. break;
  608. }
  609. }
  610. if (i == max_cpus) {
  611. hw_error("Fatal error while trying to get io event!\n");
  612. }
  613. /* unmask the wanted port again */
  614. xenevtchn_unmask(state->xce_handle, port);
  615. /* get the io packet from shared memory */
  616. state->send_vcpu = i;
  617. return cpu_get_ioreq_from_shared_memory(state, i);
  618. }
  619. /* read error or read nothing */
  620. return NULL;
  621. }
  622. static uint32_t do_inp(uint32_t addr, unsigned long size)
  623. {
  624. switch (size) {
  625. case 1:
  626. return cpu_inb(addr);
  627. case 2:
  628. return cpu_inw(addr);
  629. case 4:
  630. return cpu_inl(addr);
  631. default:
  632. hw_error("inp: bad size: %04x %lx", addr, size);
  633. }
  634. }
  635. static void do_outp(uint32_t addr,
  636. unsigned long size, uint32_t val)
  637. {
  638. switch (size) {
  639. case 1:
  640. return cpu_outb(addr, val);
  641. case 2:
  642. return cpu_outw(addr, val);
  643. case 4:
  644. return cpu_outl(addr, val);
  645. default:
  646. hw_error("outp: bad size: %04x %lx", addr, size);
  647. }
  648. }
  649. /*
  650. * Helper functions which read/write an object from/to physical guest
  651. * memory, as part of the implementation of an ioreq.
  652. *
  653. * Equivalent to
  654. * cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
  655. * val, req->size, 0/1)
  656. * except without the integer overflow problems.
  657. */
  658. static void rw_phys_req_item(hwaddr addr,
  659. ioreq_t *req, uint32_t i, void *val, int rw)
  660. {
  661. /* Do everything unsigned so overflow just results in a truncated result
  662. * and accesses to undesired parts of guest memory, which is up
  663. * to the guest */
  664. hwaddr offset = (hwaddr)req->size * i;
  665. if (req->df) {
  666. addr -= offset;
  667. } else {
  668. addr += offset;
  669. }
  670. cpu_physical_memory_rw(addr, val, req->size, rw);
  671. }
  672. static inline void read_phys_req_item(hwaddr addr,
  673. ioreq_t *req, uint32_t i, void *val)
  674. {
  675. rw_phys_req_item(addr, req, i, val, 0);
  676. }
  677. static inline void write_phys_req_item(hwaddr addr,
  678. ioreq_t *req, uint32_t i, void *val)
  679. {
  680. rw_phys_req_item(addr, req, i, val, 1);
  681. }
  682. static void cpu_ioreq_pio(ioreq_t *req)
  683. {
  684. uint32_t i;
  685. trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
  686. req->data, req->count, req->size);
  687. if (req->size > sizeof(uint32_t)) {
  688. hw_error("PIO: bad size (%u)", req->size);
  689. }
  690. if (req->dir == IOREQ_READ) {
  691. if (!req->data_is_ptr) {
  692. req->data = do_inp(req->addr, req->size);
  693. trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
  694. req->size);
  695. } else {
  696. uint32_t tmp;
  697. for (i = 0; i < req->count; i++) {
  698. tmp = do_inp(req->addr, req->size);
  699. write_phys_req_item(req->data, req, i, &tmp);
  700. }
  701. }
  702. } else if (req->dir == IOREQ_WRITE) {
  703. if (!req->data_is_ptr) {
  704. trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
  705. req->size);
  706. do_outp(req->addr, req->size, req->data);
  707. } else {
  708. for (i = 0; i < req->count; i++) {
  709. uint32_t tmp = 0;
  710. read_phys_req_item(req->data, req, i, &tmp);
  711. do_outp(req->addr, req->size, tmp);
  712. }
  713. }
  714. }
  715. }
  716. static void cpu_ioreq_move(ioreq_t *req)
  717. {
  718. uint32_t i;
  719. trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
  720. req->data, req->count, req->size);
  721. if (req->size > sizeof(req->data)) {
  722. hw_error("MMIO: bad size (%u)", req->size);
  723. }
  724. if (!req->data_is_ptr) {
  725. if (req->dir == IOREQ_READ) {
  726. for (i = 0; i < req->count; i++) {
  727. read_phys_req_item(req->addr, req, i, &req->data);
  728. }
  729. } else if (req->dir == IOREQ_WRITE) {
  730. for (i = 0; i < req->count; i++) {
  731. write_phys_req_item(req->addr, req, i, &req->data);
  732. }
  733. }
  734. } else {
  735. uint64_t tmp;
  736. if (req->dir == IOREQ_READ) {
  737. for (i = 0; i < req->count; i++) {
  738. read_phys_req_item(req->addr, req, i, &tmp);
  739. write_phys_req_item(req->data, req, i, &tmp);
  740. }
  741. } else if (req->dir == IOREQ_WRITE) {
  742. for (i = 0; i < req->count; i++) {
  743. read_phys_req_item(req->data, req, i, &tmp);
  744. write_phys_req_item(req->addr, req, i, &tmp);
  745. }
  746. }
  747. }
  748. }
  749. static void regs_to_cpu(vmware_regs_t *vmport_regs, ioreq_t *req)
  750. {
  751. X86CPU *cpu;
  752. CPUX86State *env;
  753. cpu = X86_CPU(current_cpu);
  754. env = &cpu->env;
  755. env->regs[R_EAX] = req->data;
  756. env->regs[R_EBX] = vmport_regs->ebx;
  757. env->regs[R_ECX] = vmport_regs->ecx;
  758. env->regs[R_EDX] = vmport_regs->edx;
  759. env->regs[R_ESI] = vmport_regs->esi;
  760. env->regs[R_EDI] = vmport_regs->edi;
  761. }
  762. static void regs_from_cpu(vmware_regs_t *vmport_regs)
  763. {
  764. X86CPU *cpu = X86_CPU(current_cpu);
  765. CPUX86State *env = &cpu->env;
  766. vmport_regs->ebx = env->regs[R_EBX];
  767. vmport_regs->ecx = env->regs[R_ECX];
  768. vmport_regs->edx = env->regs[R_EDX];
  769. vmport_regs->esi = env->regs[R_ESI];
  770. vmport_regs->edi = env->regs[R_EDI];
  771. }
  772. static void handle_vmport_ioreq(XenIOState *state, ioreq_t *req)
  773. {
  774. vmware_regs_t *vmport_regs;
  775. assert(state->shared_vmport_page);
  776. vmport_regs =
  777. &state->shared_vmport_page->vcpu_vmport_regs[state->send_vcpu];
  778. QEMU_BUILD_BUG_ON(sizeof(*req) < sizeof(*vmport_regs));
  779. current_cpu = state->cpu_by_vcpu_id[state->send_vcpu];
  780. regs_to_cpu(vmport_regs, req);
  781. cpu_ioreq_pio(req);
  782. regs_from_cpu(vmport_regs);
  783. current_cpu = NULL;
  784. }
  785. static void handle_ioreq(XenIOState *state, ioreq_t *req)
  786. {
  787. trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
  788. req->addr, req->data, req->count, req->size);
  789. if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
  790. (req->size < sizeof (target_ulong))) {
  791. req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
  792. }
  793. if (req->dir == IOREQ_WRITE)
  794. trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
  795. req->addr, req->data, req->count, req->size);
  796. switch (req->type) {
  797. case IOREQ_TYPE_PIO:
  798. cpu_ioreq_pio(req);
  799. break;
  800. case IOREQ_TYPE_COPY:
  801. cpu_ioreq_move(req);
  802. break;
  803. case IOREQ_TYPE_VMWARE_PORT:
  804. handle_vmport_ioreq(state, req);
  805. break;
  806. case IOREQ_TYPE_TIMEOFFSET:
  807. break;
  808. case IOREQ_TYPE_INVALIDATE:
  809. xen_invalidate_map_cache();
  810. break;
  811. case IOREQ_TYPE_PCI_CONFIG: {
  812. uint32_t sbdf = req->addr >> 32;
  813. uint32_t val;
  814. /* Fake a write to port 0xCF8 so that
  815. * the config space access will target the
  816. * correct device model.
  817. */
  818. val = (1u << 31) |
  819. ((req->addr & 0x0f00) << 16) |
  820. ((sbdf & 0xffff) << 8) |
  821. (req->addr & 0xfc);
  822. do_outp(0xcf8, 4, val);
  823. /* Now issue the config space access via
  824. * port 0xCFC
  825. */
  826. req->addr = 0xcfc | (req->addr & 0x03);
  827. cpu_ioreq_pio(req);
  828. break;
  829. }
  830. default:
  831. hw_error("Invalid ioreq type 0x%x\n", req->type);
  832. }
  833. if (req->dir == IOREQ_READ) {
  834. trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
  835. req->addr, req->data, req->count, req->size);
  836. }
  837. }
  838. static int handle_buffered_iopage(XenIOState *state)
  839. {
  840. buffered_iopage_t *buf_page = state->buffered_io_page;
  841. buf_ioreq_t *buf_req = NULL;
  842. ioreq_t req;
  843. int qw;
  844. if (!buf_page) {
  845. return 0;
  846. }
  847. memset(&req, 0x00, sizeof(req));
  848. req.state = STATE_IOREQ_READY;
  849. req.count = 1;
  850. req.dir = IOREQ_WRITE;
  851. for (;;) {
  852. uint32_t rdptr = buf_page->read_pointer, wrptr;
  853. xen_rmb();
  854. wrptr = buf_page->write_pointer;
  855. xen_rmb();
  856. if (rdptr != buf_page->read_pointer) {
  857. continue;
  858. }
  859. if (rdptr == wrptr) {
  860. break;
  861. }
  862. buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
  863. req.size = 1U << buf_req->size;
  864. req.addr = buf_req->addr;
  865. req.data = buf_req->data;
  866. req.type = buf_req->type;
  867. xen_rmb();
  868. qw = (req.size == 8);
  869. if (qw) {
  870. if (rdptr + 1 == wrptr) {
  871. hw_error("Incomplete quad word buffered ioreq");
  872. }
  873. buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
  874. IOREQ_BUFFER_SLOT_NUM];
  875. req.data |= ((uint64_t)buf_req->data) << 32;
  876. xen_rmb();
  877. }
  878. handle_ioreq(state, &req);
  879. /* Only req.data may get updated by handle_ioreq(), albeit even that
  880. * should not happen as such data would never make it to the guest (we
  881. * can only usefully see writes here after all).
  882. */
  883. assert(req.state == STATE_IOREQ_READY);
  884. assert(req.count == 1);
  885. assert(req.dir == IOREQ_WRITE);
  886. assert(!req.data_is_ptr);
  887. atomic_add(&buf_page->read_pointer, qw + 1);
  888. }
  889. return req.count;
  890. }
  891. static void handle_buffered_io(void *opaque)
  892. {
  893. XenIOState *state = opaque;
  894. if (handle_buffered_iopage(state)) {
  895. timer_mod(state->buffered_io_timer,
  896. BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  897. } else {
  898. timer_del(state->buffered_io_timer);
  899. xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port);
  900. }
  901. }
  902. static void cpu_handle_ioreq(void *opaque)
  903. {
  904. XenIOState *state = opaque;
  905. ioreq_t *req = cpu_get_ioreq(state);
  906. handle_buffered_iopage(state);
  907. if (req) {
  908. ioreq_t copy = *req;
  909. xen_rmb();
  910. handle_ioreq(state, &copy);
  911. req->data = copy.data;
  912. if (req->state != STATE_IOREQ_INPROCESS) {
  913. fprintf(stderr, "Badness in I/O request ... not in service?!: "
  914. "%x, ptr: %x, port: %"PRIx64", "
  915. "data: %"PRIx64", count: %u, size: %u, type: %u\n",
  916. req->state, req->data_is_ptr, req->addr,
  917. req->data, req->count, req->size, req->type);
  918. destroy_hvm_domain(false);
  919. return;
  920. }
  921. xen_wmb(); /* Update ioreq contents /then/ update state. */
  922. /*
  923. * We do this before we send the response so that the tools
  924. * have the opportunity to pick up on the reset before the
  925. * guest resumes and does a hlt with interrupts disabled which
  926. * causes Xen to powerdown the domain.
  927. */
  928. if (runstate_is_running()) {
  929. if (qemu_shutdown_requested_get()) {
  930. destroy_hvm_domain(false);
  931. }
  932. if (qemu_reset_requested_get()) {
  933. qemu_system_reset(VMRESET_REPORT);
  934. destroy_hvm_domain(true);
  935. }
  936. }
  937. req->state = STATE_IORESP_READY;
  938. xenevtchn_notify(state->xce_handle,
  939. state->ioreq_local_port[state->send_vcpu]);
  940. }
  941. }
  942. static void xen_main_loop_prepare(XenIOState *state)
  943. {
  944. int evtchn_fd = -1;
  945. if (state->xce_handle != NULL) {
  946. evtchn_fd = xenevtchn_fd(state->xce_handle);
  947. }
  948. state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
  949. state);
  950. if (evtchn_fd != -1) {
  951. CPUState *cpu_state;
  952. DPRINTF("%s: Init cpu_by_vcpu_id\n", __func__);
  953. CPU_FOREACH(cpu_state) {
  954. DPRINTF("%s: cpu_by_vcpu_id[%d]=%p\n",
  955. __func__, cpu_state->cpu_index, cpu_state);
  956. state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
  957. }
  958. qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
  959. }
  960. }
  961. static void xen_hvm_change_state_handler(void *opaque, int running,
  962. RunState rstate)
  963. {
  964. XenIOState *state = opaque;
  965. if (running) {
  966. xen_main_loop_prepare(state);
  967. }
  968. xen_set_ioreq_server_state(xen_xc, xen_domid,
  969. state->ioservid,
  970. (rstate == RUN_STATE_RUNNING));
  971. }
  972. static void xen_exit_notifier(Notifier *n, void *data)
  973. {
  974. XenIOState *state = container_of(n, XenIOState, exit);
  975. xenevtchn_close(state->xce_handle);
  976. xs_daemon_close(state->xenstore);
  977. }
  978. static void xen_read_physmap(XenIOState *state)
  979. {
  980. XenPhysmap *physmap = NULL;
  981. unsigned int len, num, i;
  982. char path[80], *value = NULL;
  983. char **entries = NULL;
  984. snprintf(path, sizeof(path),
  985. "/local/domain/0/device-model/%d/physmap", xen_domid);
  986. entries = xs_directory(state->xenstore, 0, path, &num);
  987. if (entries == NULL)
  988. return;
  989. for (i = 0; i < num; i++) {
  990. physmap = g_malloc(sizeof (XenPhysmap));
  991. physmap->phys_offset = strtoull(entries[i], NULL, 16);
  992. snprintf(path, sizeof(path),
  993. "/local/domain/0/device-model/%d/physmap/%s/start_addr",
  994. xen_domid, entries[i]);
  995. value = xs_read(state->xenstore, 0, path, &len);
  996. if (value == NULL) {
  997. g_free(physmap);
  998. continue;
  999. }
  1000. physmap->start_addr = strtoull(value, NULL, 16);
  1001. free(value);
  1002. snprintf(path, sizeof(path),
  1003. "/local/domain/0/device-model/%d/physmap/%s/size",
  1004. xen_domid, entries[i]);
  1005. value = xs_read(state->xenstore, 0, path, &len);
  1006. if (value == NULL) {
  1007. g_free(physmap);
  1008. continue;
  1009. }
  1010. physmap->size = strtoull(value, NULL, 16);
  1011. free(value);
  1012. snprintf(path, sizeof(path),
  1013. "/local/domain/0/device-model/%d/physmap/%s/name",
  1014. xen_domid, entries[i]);
  1015. physmap->name = xs_read(state->xenstore, 0, path, &len);
  1016. QLIST_INSERT_HEAD(&state->physmap, physmap, list);
  1017. }
  1018. free(entries);
  1019. }
  1020. static void xen_wakeup_notifier(Notifier *notifier, void *data)
  1021. {
  1022. xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 0);
  1023. }
  1024. void xen_hvm_init(PCMachineState *pcms, MemoryRegion **ram_memory)
  1025. {
  1026. int i, rc;
  1027. xen_pfn_t ioreq_pfn;
  1028. xen_pfn_t bufioreq_pfn;
  1029. evtchn_port_t bufioreq_evtchn;
  1030. XenIOState *state;
  1031. state = g_malloc0(sizeof (XenIOState));
  1032. state->xce_handle = xenevtchn_open(NULL, 0);
  1033. if (state->xce_handle == NULL) {
  1034. perror("xen: event channel open");
  1035. goto err;
  1036. }
  1037. state->xenstore = xs_daemon_open();
  1038. if (state->xenstore == NULL) {
  1039. perror("xen: xenstore open");
  1040. goto err;
  1041. }
  1042. xen_create_ioreq_server(xen_xc, xen_domid, &state->ioservid);
  1043. state->exit.notify = xen_exit_notifier;
  1044. qemu_add_exit_notifier(&state->exit);
  1045. state->suspend.notify = xen_suspend_notifier;
  1046. qemu_register_suspend_notifier(&state->suspend);
  1047. state->wakeup.notify = xen_wakeup_notifier;
  1048. qemu_register_wakeup_notifier(&state->wakeup);
  1049. rc = xen_get_ioreq_server_info(xen_xc, xen_domid, state->ioservid,
  1050. &ioreq_pfn, &bufioreq_pfn,
  1051. &bufioreq_evtchn);
  1052. if (rc < 0) {
  1053. error_report("failed to get ioreq server info: error %d handle=%p",
  1054. errno, xen_xc);
  1055. goto err;
  1056. }
  1057. DPRINTF("shared page at pfn %lx\n", ioreq_pfn);
  1058. DPRINTF("buffered io page at pfn %lx\n", bufioreq_pfn);
  1059. DPRINTF("buffered io evtchn is %x\n", bufioreq_evtchn);
  1060. state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
  1061. PROT_READ|PROT_WRITE,
  1062. 1, &ioreq_pfn, NULL);
  1063. if (state->shared_page == NULL) {
  1064. error_report("map shared IO page returned error %d handle=%p",
  1065. errno, xen_xc);
  1066. goto err;
  1067. }
  1068. rc = xen_get_vmport_regs_pfn(xen_xc, xen_domid, &ioreq_pfn);
  1069. if (!rc) {
  1070. DPRINTF("shared vmport page at pfn %lx\n", ioreq_pfn);
  1071. state->shared_vmport_page =
  1072. xenforeignmemory_map(xen_fmem, xen_domid, PROT_READ|PROT_WRITE,
  1073. 1, &ioreq_pfn, NULL);
  1074. if (state->shared_vmport_page == NULL) {
  1075. error_report("map shared vmport IO page returned error %d handle=%p",
  1076. errno, xen_xc);
  1077. goto err;
  1078. }
  1079. } else if (rc != -ENOSYS) {
  1080. error_report("get vmport regs pfn returned error %d, rc=%d",
  1081. errno, rc);
  1082. goto err;
  1083. }
  1084. state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
  1085. PROT_READ|PROT_WRITE,
  1086. 1, &bufioreq_pfn, NULL);
  1087. if (state->buffered_io_page == NULL) {
  1088. error_report("map buffered IO page returned error %d", errno);
  1089. goto err;
  1090. }
  1091. /* Note: cpus is empty at this point in init */
  1092. state->cpu_by_vcpu_id = g_malloc0(max_cpus * sizeof(CPUState *));
  1093. rc = xen_set_ioreq_server_state(xen_xc, xen_domid, state->ioservid, true);
  1094. if (rc < 0) {
  1095. error_report("failed to enable ioreq server info: error %d handle=%p",
  1096. errno, xen_xc);
  1097. goto err;
  1098. }
  1099. state->ioreq_local_port = g_malloc0(max_cpus * sizeof (evtchn_port_t));
  1100. /* FIXME: how about if we overflow the page here? */
  1101. for (i = 0; i < max_cpus; i++) {
  1102. rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
  1103. xen_vcpu_eport(state->shared_page, i));
  1104. if (rc == -1) {
  1105. error_report("shared evtchn %d bind error %d", i, errno);
  1106. goto err;
  1107. }
  1108. state->ioreq_local_port[i] = rc;
  1109. }
  1110. rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
  1111. bufioreq_evtchn);
  1112. if (rc == -1) {
  1113. error_report("buffered evtchn bind error %d", errno);
  1114. goto err;
  1115. }
  1116. state->bufioreq_local_port = rc;
  1117. /* Init RAM management */
  1118. xen_map_cache_init(xen_phys_offset_to_gaddr, state);
  1119. xen_ram_init(pcms, ram_size, ram_memory);
  1120. qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
  1121. state->memory_listener = xen_memory_listener;
  1122. QLIST_INIT(&state->physmap);
  1123. memory_listener_register(&state->memory_listener, &address_space_memory);
  1124. state->log_for_dirtybit = NULL;
  1125. state->io_listener = xen_io_listener;
  1126. memory_listener_register(&state->io_listener, &address_space_io);
  1127. state->device_listener = xen_device_listener;
  1128. device_listener_register(&state->device_listener);
  1129. /* Initialize backend core & drivers */
  1130. if (xen_be_init() != 0) {
  1131. error_report("xen backend core setup failed");
  1132. goto err;
  1133. }
  1134. xen_be_register_common();
  1135. xen_read_physmap(state);
  1136. /* Disable ACPI build because Xen handles it */
  1137. pcms->acpi_build_enabled = false;
  1138. return;
  1139. err:
  1140. error_report("xen hardware virtual machine initialisation failed");
  1141. exit(1);
  1142. }
  1143. void destroy_hvm_domain(bool reboot)
  1144. {
  1145. xc_interface *xc_handle;
  1146. int sts;
  1147. xc_handle = xc_interface_open(0, 0, 0);
  1148. if (xc_handle == NULL) {
  1149. fprintf(stderr, "Cannot acquire xenctrl handle\n");
  1150. } else {
  1151. sts = xc_domain_shutdown(xc_handle, xen_domid,
  1152. reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff);
  1153. if (sts != 0) {
  1154. fprintf(stderr, "xc_domain_shutdown failed to issue %s, "
  1155. "sts %d, %s\n", reboot ? "reboot" : "poweroff",
  1156. sts, strerror(errno));
  1157. } else {
  1158. fprintf(stderr, "Issued domain %d %s\n", xen_domid,
  1159. reboot ? "reboot" : "poweroff");
  1160. }
  1161. xc_interface_close(xc_handle);
  1162. }
  1163. }
  1164. void xen_register_framebuffer(MemoryRegion *mr)
  1165. {
  1166. framebuffer = mr;
  1167. }
  1168. void xen_shutdown_fatal_error(const char *fmt, ...)
  1169. {
  1170. va_list ap;
  1171. va_start(ap, fmt);
  1172. vfprintf(stderr, fmt, ap);
  1173. va_end(ap);
  1174. fprintf(stderr, "Will destroy the domain.\n");
  1175. /* destroy the domain */
  1176. qemu_system_shutdown_request();
  1177. }
  1178. void xen_modified_memory(ram_addr_t start, ram_addr_t length)
  1179. {
  1180. if (unlikely(xen_in_migration)) {
  1181. int rc;
  1182. ram_addr_t start_pfn, nb_pages;
  1183. if (length == 0) {
  1184. length = TARGET_PAGE_SIZE;
  1185. }
  1186. start_pfn = start >> TARGET_PAGE_BITS;
  1187. nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
  1188. - start_pfn;
  1189. rc = xc_hvm_modified_memory(xen_xc, xen_domid, start_pfn, nb_pages);
  1190. if (rc) {
  1191. fprintf(stderr,
  1192. "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
  1193. __func__, start, nb_pages, rc, strerror(-rc));
  1194. }
  1195. }
  1196. }
  1197. void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
  1198. {
  1199. if (enable) {
  1200. memory_global_dirty_log_start();
  1201. } else {
  1202. memory_global_dirty_log_stop();
  1203. }
  1204. }