xen-hvm-common.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932
  1. #include "qemu/osdep.h"
  2. #include "qemu/units.h"
  3. #include "qemu/error-report.h"
  4. #include "qapi/error.h"
  5. #include "exec/target_long.h"
  6. #include "exec/target_page.h"
  7. #include "trace.h"
  8. #include "hw/hw.h"
  9. #include "hw/pci/pci_host.h"
  10. #include "hw/xen/xen-hvm-common.h"
  11. #include "hw/xen/xen-bus.h"
  12. #include "hw/boards.h"
  13. #include "hw/xen/arch_hvm.h"
  14. #include "system/runstate.h"
  15. #include "system/system.h"
  16. #include "system/xen.h"
  17. #include "system/xen-mapcache.h"
  18. MemoryRegion xen_memory, xen_grants;
  19. /* Check for any kind of xen memory, foreign mappings or grants. */
  20. bool xen_mr_is_memory(MemoryRegion *mr)
  21. {
  22. return mr == &xen_memory || mr == &xen_grants;
  23. }
  24. /* Check specifically for grants. */
  25. bool xen_mr_is_grants(MemoryRegion *mr)
  26. {
  27. return mr == &xen_grants;
  28. }
  29. void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
  30. Error **errp)
  31. {
  32. unsigned target_page_bits = qemu_target_page_bits();
  33. unsigned long nr_pfn;
  34. xen_pfn_t *pfn_list;
  35. int i;
  36. if (runstate_check(RUN_STATE_INMIGRATE)) {
  37. /* RAM already populated in Xen */
  38. warn_report("%s: do not alloc "RAM_ADDR_FMT
  39. " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE",
  40. __func__, size, ram_addr);
  41. return;
  42. }
  43. if (xen_mr_is_memory(mr)) {
  44. return;
  45. }
  46. trace_xen_ram_alloc(ram_addr, size);
  47. nr_pfn = size >> target_page_bits;
  48. pfn_list = g_new(xen_pfn_t, nr_pfn);
  49. for (i = 0; i < nr_pfn; i++) {
  50. pfn_list[i] = (ram_addr >> target_page_bits) + i;
  51. }
  52. if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
  53. error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
  54. ram_addr);
  55. }
  56. g_free(pfn_list);
  57. }
  58. static void xen_set_memory(struct MemoryListener *listener,
  59. MemoryRegionSection *section,
  60. bool add)
  61. {
  62. XenIOState *state = container_of(listener, XenIOState, memory_listener);
  63. if (xen_mr_is_memory(section->mr)) {
  64. return;
  65. } else {
  66. if (add) {
  67. xen_map_memory_section(xen_domid, state->ioservid,
  68. section);
  69. } else {
  70. xen_unmap_memory_section(xen_domid, state->ioservid,
  71. section);
  72. }
  73. }
  74. arch_xen_set_memory(state, section, add);
  75. }
  76. void xen_region_add(MemoryListener *listener,
  77. MemoryRegionSection *section)
  78. {
  79. memory_region_ref(section->mr);
  80. xen_set_memory(listener, section, true);
  81. }
  82. void xen_region_del(MemoryListener *listener,
  83. MemoryRegionSection *section)
  84. {
  85. xen_set_memory(listener, section, false);
  86. memory_region_unref(section->mr);
  87. }
  88. void xen_io_add(MemoryListener *listener,
  89. MemoryRegionSection *section)
  90. {
  91. XenIOState *state = container_of(listener, XenIOState, io_listener);
  92. MemoryRegion *mr = section->mr;
  93. if (mr->ops == &unassigned_io_ops) {
  94. return;
  95. }
  96. memory_region_ref(mr);
  97. xen_map_io_section(xen_domid, state->ioservid, section);
  98. }
  99. void xen_io_del(MemoryListener *listener,
  100. MemoryRegionSection *section)
  101. {
  102. XenIOState *state = container_of(listener, XenIOState, io_listener);
  103. MemoryRegion *mr = section->mr;
  104. if (mr->ops == &unassigned_io_ops) {
  105. return;
  106. }
  107. xen_unmap_io_section(xen_domid, state->ioservid, section);
  108. memory_region_unref(mr);
  109. }
  110. void xen_device_realize(DeviceListener *listener,
  111. DeviceState *dev)
  112. {
  113. XenIOState *state = container_of(listener, XenIOState, device_listener);
  114. if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  115. PCIDevice *pci_dev = PCI_DEVICE(dev);
  116. XenPciDevice *xendev = g_new(XenPciDevice, 1);
  117. xendev->pci_dev = pci_dev;
  118. xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
  119. pci_dev->devfn);
  120. QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
  121. xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
  122. }
  123. }
  124. void xen_device_unrealize(DeviceListener *listener,
  125. DeviceState *dev)
  126. {
  127. XenIOState *state = container_of(listener, XenIOState, device_listener);
  128. if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  129. PCIDevice *pci_dev = PCI_DEVICE(dev);
  130. XenPciDevice *xendev, *next;
  131. xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
  132. QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
  133. if (xendev->pci_dev == pci_dev) {
  134. QLIST_REMOVE(xendev, entry);
  135. g_free(xendev);
  136. break;
  137. }
  138. }
  139. }
  140. }
  141. MemoryListener xen_io_listener = {
  142. .name = "xen-io",
  143. .region_add = xen_io_add,
  144. .region_del = xen_io_del,
  145. .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
  146. };
  147. DeviceListener xen_device_listener = {
  148. .realize = xen_device_realize,
  149. .unrealize = xen_device_unrealize,
  150. };
  151. /* get the ioreq packets from share mem */
  152. static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
  153. {
  154. ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
  155. if (req->state != STATE_IOREQ_READY) {
  156. trace_cpu_get_ioreq_from_shared_memory_req_not_ready(req->state,
  157. req->data_is_ptr,
  158. req->addr,
  159. req->data,
  160. req->count,
  161. req->size);
  162. return NULL;
  163. }
  164. xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
  165. req->state = STATE_IOREQ_INPROCESS;
  166. return req;
  167. }
  168. /* use poll to get the port notification */
  169. /* ioreq_vec--out,the */
  170. /* retval--the number of ioreq packet */
  171. static ioreq_t *cpu_get_ioreq(XenIOState *state)
  172. {
  173. MachineState *ms = MACHINE(qdev_get_machine());
  174. unsigned int max_cpus = ms->smp.max_cpus;
  175. int i;
  176. evtchn_port_t port;
  177. port = qemu_xen_evtchn_pending(state->xce_handle);
  178. if (port == state->bufioreq_local_port) {
  179. timer_mod(state->buffered_io_timer,
  180. BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  181. return NULL;
  182. }
  183. if (port != -1) {
  184. for (i = 0; i < max_cpus; i++) {
  185. if (state->ioreq_local_port[i] == port) {
  186. break;
  187. }
  188. }
  189. if (i == max_cpus) {
  190. hw_error("Fatal error while trying to get io event!\n");
  191. }
  192. /* unmask the wanted port again */
  193. qemu_xen_evtchn_unmask(state->xce_handle, port);
  194. /* get the io packet from shared memory */
  195. state->send_vcpu = i;
  196. return cpu_get_ioreq_from_shared_memory(state, i);
  197. }
  198. /* read error or read nothing */
  199. return NULL;
  200. }
  201. static uint32_t do_inp(uint32_t addr, unsigned long size)
  202. {
  203. switch (size) {
  204. case 1:
  205. return cpu_inb(addr);
  206. case 2:
  207. return cpu_inw(addr);
  208. case 4:
  209. return cpu_inl(addr);
  210. default:
  211. hw_error("inp: bad size: %04x %lx", addr, size);
  212. }
  213. }
  214. static void do_outp(uint32_t addr,
  215. unsigned long size, uint32_t val)
  216. {
  217. switch (size) {
  218. case 1:
  219. return cpu_outb(addr, val);
  220. case 2:
  221. return cpu_outw(addr, val);
  222. case 4:
  223. return cpu_outl(addr, val);
  224. default:
  225. hw_error("outp: bad size: %04x %lx", addr, size);
  226. }
  227. }
  228. /*
  229. * Helper functions which read/write an object from/to physical guest
  230. * memory, as part of the implementation of an ioreq.
  231. *
  232. * Equivalent to
  233. * cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
  234. * val, req->size, 0/1)
  235. * except without the integer overflow problems.
  236. */
  237. static void rw_phys_req_item(hwaddr addr,
  238. ioreq_t *req, uint32_t i, void *val, int rw)
  239. {
  240. /* Do everything unsigned so overflow just results in a truncated result
  241. * and accesses to undesired parts of guest memory, which is up
  242. * to the guest */
  243. hwaddr offset = (hwaddr)req->size * i;
  244. if (req->df) {
  245. addr -= offset;
  246. } else {
  247. addr += offset;
  248. }
  249. cpu_physical_memory_rw(addr, val, req->size, rw);
  250. }
  251. static inline void read_phys_req_item(hwaddr addr,
  252. ioreq_t *req, uint32_t i, void *val)
  253. {
  254. rw_phys_req_item(addr, req, i, val, 0);
  255. }
  256. static inline void write_phys_req_item(hwaddr addr,
  257. ioreq_t *req, uint32_t i, void *val)
  258. {
  259. rw_phys_req_item(addr, req, i, val, 1);
  260. }
  261. void cpu_ioreq_pio(ioreq_t *req)
  262. {
  263. uint32_t i;
  264. trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
  265. req->data, req->count, req->size);
  266. if (req->size > sizeof(uint32_t)) {
  267. hw_error("PIO: bad size (%u)", req->size);
  268. }
  269. if (req->dir == IOREQ_READ) {
  270. if (!req->data_is_ptr) {
  271. req->data = do_inp(req->addr, req->size);
  272. trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
  273. req->size);
  274. } else {
  275. uint32_t tmp;
  276. for (i = 0; i < req->count; i++) {
  277. tmp = do_inp(req->addr, req->size);
  278. write_phys_req_item(req->data, req, i, &tmp);
  279. }
  280. }
  281. } else if (req->dir == IOREQ_WRITE) {
  282. if (!req->data_is_ptr) {
  283. trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
  284. req->size);
  285. do_outp(req->addr, req->size, req->data);
  286. } else {
  287. for (i = 0; i < req->count; i++) {
  288. uint32_t tmp = 0;
  289. read_phys_req_item(req->data, req, i, &tmp);
  290. do_outp(req->addr, req->size, tmp);
  291. }
  292. }
  293. }
  294. }
  295. static void cpu_ioreq_move(ioreq_t *req)
  296. {
  297. uint32_t i;
  298. trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
  299. req->data, req->count, req->size);
  300. if (req->size > sizeof(req->data)) {
  301. hw_error("MMIO: bad size (%u)", req->size);
  302. }
  303. if (!req->data_is_ptr) {
  304. if (req->dir == IOREQ_READ) {
  305. for (i = 0; i < req->count; i++) {
  306. read_phys_req_item(req->addr, req, i, &req->data);
  307. }
  308. } else if (req->dir == IOREQ_WRITE) {
  309. for (i = 0; i < req->count; i++) {
  310. write_phys_req_item(req->addr, req, i, &req->data);
  311. }
  312. }
  313. } else {
  314. uint64_t tmp;
  315. if (req->dir == IOREQ_READ) {
  316. for (i = 0; i < req->count; i++) {
  317. read_phys_req_item(req->addr, req, i, &tmp);
  318. write_phys_req_item(req->data, req, i, &tmp);
  319. }
  320. } else if (req->dir == IOREQ_WRITE) {
  321. for (i = 0; i < req->count; i++) {
  322. read_phys_req_item(req->data, req, i, &tmp);
  323. write_phys_req_item(req->addr, req, i, &tmp);
  324. }
  325. }
  326. }
  327. }
  328. static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
  329. {
  330. uint32_t sbdf = req->addr >> 32;
  331. uint32_t reg = req->addr;
  332. XenPciDevice *xendev;
  333. if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
  334. req->size != sizeof(uint32_t)) {
  335. hw_error("PCI config access: bad size (%u)", req->size);
  336. }
  337. if (req->count != 1) {
  338. hw_error("PCI config access: bad count (%u)", req->count);
  339. }
  340. QLIST_FOREACH(xendev, &state->dev_list, entry) {
  341. if (xendev->sbdf != sbdf) {
  342. continue;
  343. }
  344. if (!req->data_is_ptr) {
  345. if (req->dir == IOREQ_READ) {
  346. req->data = pci_host_config_read_common(
  347. xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
  348. req->size);
  349. trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
  350. req->size, req->data);
  351. } else if (req->dir == IOREQ_WRITE) {
  352. trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
  353. req->size, req->data);
  354. pci_host_config_write_common(
  355. xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
  356. req->data, req->size);
  357. }
  358. } else {
  359. uint32_t tmp;
  360. if (req->dir == IOREQ_READ) {
  361. tmp = pci_host_config_read_common(
  362. xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
  363. req->size);
  364. trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
  365. req->size, tmp);
  366. write_phys_req_item(req->data, req, 0, &tmp);
  367. } else if (req->dir == IOREQ_WRITE) {
  368. read_phys_req_item(req->data, req, 0, &tmp);
  369. trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
  370. req->size, tmp);
  371. pci_host_config_write_common(
  372. xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
  373. tmp, req->size);
  374. }
  375. }
  376. }
  377. }
  378. static void handle_ioreq(XenIOState *state, ioreq_t *req)
  379. {
  380. trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
  381. req->addr, req->data, req->count, req->size);
  382. if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
  383. (req->size < sizeof (target_ulong))) {
  384. req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
  385. }
  386. if (req->dir == IOREQ_WRITE)
  387. trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
  388. req->addr, req->data, req->count, req->size);
  389. switch (req->type) {
  390. case IOREQ_TYPE_PIO:
  391. cpu_ioreq_pio(req);
  392. break;
  393. case IOREQ_TYPE_COPY:
  394. cpu_ioreq_move(req);
  395. break;
  396. case IOREQ_TYPE_TIMEOFFSET:
  397. break;
  398. case IOREQ_TYPE_INVALIDATE:
  399. xen_invalidate_map_cache();
  400. break;
  401. case IOREQ_TYPE_PCI_CONFIG:
  402. cpu_ioreq_config(state, req);
  403. break;
  404. default:
  405. arch_handle_ioreq(state, req);
  406. }
  407. if (req->dir == IOREQ_READ) {
  408. trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
  409. req->addr, req->data, req->count, req->size);
  410. }
  411. }
  412. static unsigned int handle_buffered_iopage(XenIOState *state)
  413. {
  414. buffered_iopage_t *buf_page = state->buffered_io_page;
  415. buf_ioreq_t *buf_req = NULL;
  416. unsigned int handled = 0;
  417. ioreq_t req;
  418. int qw;
  419. if (!buf_page) {
  420. return 0;
  421. }
  422. memset(&req, 0x00, sizeof(req));
  423. req.state = STATE_IOREQ_READY;
  424. req.count = 1;
  425. req.dir = IOREQ_WRITE;
  426. do {
  427. uint32_t rdptr = buf_page->read_pointer, wrptr;
  428. xen_rmb();
  429. wrptr = buf_page->write_pointer;
  430. xen_rmb();
  431. if (rdptr != buf_page->read_pointer) {
  432. continue;
  433. }
  434. if (rdptr == wrptr) {
  435. break;
  436. }
  437. buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
  438. req.size = 1U << buf_req->size;
  439. req.addr = buf_req->addr;
  440. req.data = buf_req->data;
  441. req.type = buf_req->type;
  442. xen_rmb();
  443. qw = (req.size == 8);
  444. if (qw) {
  445. if (rdptr + 1 == wrptr) {
  446. hw_error("Incomplete quad word buffered ioreq");
  447. }
  448. buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
  449. IOREQ_BUFFER_SLOT_NUM];
  450. req.data |= ((uint64_t)buf_req->data) << 32;
  451. xen_rmb();
  452. }
  453. handle_ioreq(state, &req);
  454. /* Only req.data may get updated by handle_ioreq(), albeit even that
  455. * should not happen as such data would never make it to the guest (we
  456. * can only usefully see writes here after all).
  457. */
  458. assert(req.state == STATE_IOREQ_READY);
  459. assert(req.count == 1);
  460. assert(req.dir == IOREQ_WRITE);
  461. assert(!req.data_is_ptr);
  462. qatomic_add(&buf_page->read_pointer, qw + 1);
  463. handled += qw + 1;
  464. } while (handled < IOREQ_BUFFER_SLOT_NUM);
  465. return handled;
  466. }
  467. static void handle_buffered_io(void *opaque)
  468. {
  469. unsigned int handled;
  470. XenIOState *state = opaque;
  471. handled = handle_buffered_iopage(state);
  472. if (handled >= IOREQ_BUFFER_SLOT_NUM) {
  473. /* We handled a full page of ioreqs. Schedule a timer to continue
  474. * processing while giving other stuff a chance to run.
  475. */
  476. timer_mod(state->buffered_io_timer,
  477. qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  478. } else if (handled == 0) {
  479. timer_del(state->buffered_io_timer);
  480. qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
  481. } else {
  482. timer_mod(state->buffered_io_timer,
  483. BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  484. }
  485. }
  486. static void cpu_handle_ioreq(void *opaque)
  487. {
  488. XenIOState *state = opaque;
  489. ioreq_t *req = cpu_get_ioreq(state);
  490. handle_buffered_iopage(state);
  491. if (req) {
  492. ioreq_t copy = *req;
  493. xen_rmb();
  494. handle_ioreq(state, &copy);
  495. req->data = copy.data;
  496. if (req->state != STATE_IOREQ_INPROCESS) {
  497. warn_report("Badness in I/O request ... not in service?!: "
  498. "%x, ptr: %x, port: %"PRIx64", "
  499. "data: %"PRIx64", count: %u, size: %u, type: %u",
  500. req->state, req->data_is_ptr, req->addr,
  501. req->data, req->count, req->size, req->type);
  502. destroy_hvm_domain(false);
  503. return;
  504. }
  505. xen_wmb(); /* Update ioreq contents /then/ update state. */
  506. /*
  507. * We do this before we send the response so that the tools
  508. * have the opportunity to pick up on the reset before the
  509. * guest resumes and does a hlt with interrupts disabled which
  510. * causes Xen to powerdown the domain.
  511. */
  512. if (runstate_is_running()) {
  513. ShutdownCause request;
  514. if (qemu_shutdown_requested_get()) {
  515. destroy_hvm_domain(false);
  516. }
  517. request = qemu_reset_requested_get();
  518. if (request) {
  519. qemu_system_reset(request);
  520. destroy_hvm_domain(true);
  521. }
  522. }
  523. req->state = STATE_IORESP_READY;
  524. qemu_xen_evtchn_notify(state->xce_handle,
  525. state->ioreq_local_port[state->send_vcpu]);
  526. }
  527. }
  528. static void xen_main_loop_prepare(XenIOState *state)
  529. {
  530. int evtchn_fd = -1;
  531. if (state->xce_handle != NULL) {
  532. evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
  533. }
  534. state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
  535. state);
  536. if (evtchn_fd != -1) {
  537. CPUState *cpu_state;
  538. CPU_FOREACH(cpu_state) {
  539. trace_xen_main_loop_prepare_init_cpu(cpu_state->cpu_index,
  540. cpu_state);
  541. state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
  542. }
  543. qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
  544. }
  545. }
  546. void xen_hvm_change_state_handler(void *opaque, bool running,
  547. RunState rstate)
  548. {
  549. XenIOState *state = opaque;
  550. if (running) {
  551. xen_main_loop_prepare(state);
  552. }
  553. xen_set_ioreq_server_state(xen_domid,
  554. state->ioservid,
  555. running);
  556. }
  557. void xen_exit_notifier(Notifier *n, void *data)
  558. {
  559. XenIOState *state = container_of(n, XenIOState, exit);
  560. xen_destroy_ioreq_server(xen_domid, state->ioservid);
  561. if (state->fres != NULL) {
  562. xenforeignmemory_unmap_resource(xen_fmem, state->fres);
  563. }
  564. qemu_xen_evtchn_close(state->xce_handle);
  565. xs_daemon_close(state->xenstore);
  566. }
  567. static int xen_map_ioreq_server(XenIOState *state)
  568. {
  569. void *addr = NULL;
  570. xen_pfn_t ioreq_pfn;
  571. xen_pfn_t bufioreq_pfn;
  572. evtchn_port_t bufioreq_evtchn;
  573. unsigned long num_frames = 1;
  574. unsigned long frame = 1;
  575. int rc;
  576. /*
  577. * Attempt to map using the resource API and fall back to normal
  578. * foreign mapping if this is not supported.
  579. */
  580. QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
  581. QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
  582. if (state->has_bufioreq) {
  583. frame = 0;
  584. num_frames = 2;
  585. }
  586. state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
  587. XENMEM_resource_ioreq_server,
  588. state->ioservid,
  589. frame, num_frames,
  590. &addr,
  591. PROT_READ | PROT_WRITE, 0);
  592. if (state->fres != NULL) {
  593. trace_xen_map_resource_ioreq(state->ioservid, addr);
  594. state->shared_page = addr;
  595. if (state->has_bufioreq) {
  596. state->buffered_io_page = addr;
  597. state->shared_page = addr + XC_PAGE_SIZE;
  598. }
  599. } else if (errno != EOPNOTSUPP) {
  600. error_report("failed to map ioreq server resources: error %d handle=%p",
  601. errno, xen_xc);
  602. return -1;
  603. }
  604. /*
  605. * If we fail to map the shared page with xenforeignmemory_map_resource()
  606. * or if we're using buffered ioreqs, we need xen_get_ioreq_server_info()
  607. * to provide the the addresses to map the shared page and/or to get the
  608. * event-channel port for buffered ioreqs.
  609. */
  610. if (state->shared_page == NULL || state->has_bufioreq) {
  611. rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
  612. (state->shared_page == NULL) ?
  613. &ioreq_pfn : NULL,
  614. (state->has_bufioreq &&
  615. state->buffered_io_page == NULL) ?
  616. &bufioreq_pfn : NULL,
  617. &bufioreq_evtchn);
  618. if (rc < 0) {
  619. error_report("failed to get ioreq server info: error %d handle=%p",
  620. errno, xen_xc);
  621. return rc;
  622. }
  623. if (state->shared_page == NULL) {
  624. trace_xen_map_ioreq_server_shared_page(ioreq_pfn);
  625. state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
  626. PROT_READ | PROT_WRITE,
  627. 1, &ioreq_pfn, NULL);
  628. }
  629. if (state->shared_page == NULL) {
  630. error_report("map shared IO page returned error %d handle=%p",
  631. errno, xen_xc);
  632. }
  633. if (state->has_bufioreq && state->buffered_io_page == NULL) {
  634. trace_xen_map_ioreq_server_buffered_io_page(bufioreq_pfn);
  635. state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
  636. PROT_READ | PROT_WRITE,
  637. 1, &bufioreq_pfn,
  638. NULL);
  639. if (state->buffered_io_page == NULL) {
  640. error_report("map buffered IO page returned error %d", errno);
  641. return -1;
  642. }
  643. }
  644. }
  645. if (state->shared_page == NULL ||
  646. (state->has_bufioreq && state->buffered_io_page == NULL)) {
  647. return -1;
  648. }
  649. if (state->has_bufioreq) {
  650. trace_xen_map_ioreq_server_buffered_io_evtchn(bufioreq_evtchn);
  651. state->bufioreq_remote_port = bufioreq_evtchn;
  652. }
  653. return 0;
  654. }
  655. void destroy_hvm_domain(bool reboot)
  656. {
  657. xc_interface *xc_handle;
  658. int sts;
  659. int rc;
  660. unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
  661. if (xen_dmod) {
  662. rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
  663. if (!rc) {
  664. return;
  665. }
  666. if (errno != ENOTTY /* old Xen */) {
  667. error_report("xendevicemodel_shutdown failed with error %d", errno);
  668. }
  669. /* well, try the old thing then */
  670. }
  671. xc_handle = xc_interface_open(0, 0, 0);
  672. if (xc_handle == NULL) {
  673. trace_destroy_hvm_domain_cannot_acquire_handle();
  674. } else {
  675. sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
  676. if (sts != 0) {
  677. trace_destroy_hvm_domain_failed_action(
  678. reboot ? "reboot" : "poweroff", sts, strerror(errno)
  679. );
  680. } else {
  681. trace_destroy_hvm_domain_action(
  682. xen_domid, reboot ? "reboot" : "poweroff"
  683. );
  684. }
  685. xc_interface_close(xc_handle);
  686. }
  687. }
  688. void xen_shutdown_fatal_error(const char *fmt, ...)
  689. {
  690. va_list ap;
  691. va_start(ap, fmt);
  692. error_vreport(fmt, ap);
  693. va_end(ap);
  694. error_report("Will destroy the domain.");
  695. /* destroy the domain */
  696. qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
  697. }
  698. static void xen_do_ioreq_register(XenIOState *state,
  699. unsigned int max_cpus,
  700. const MemoryListener *xen_memory_listener)
  701. {
  702. int i, rc;
  703. state->exit.notify = xen_exit_notifier;
  704. qemu_add_exit_notifier(&state->exit);
  705. /*
  706. * Register wake-up support in QMP query-current-machine API
  707. */
  708. qemu_register_wakeup_support();
  709. rc = xen_map_ioreq_server(state);
  710. if (rc < 0) {
  711. goto err;
  712. }
  713. /* Note: cpus is empty at this point in init */
  714. state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus);
  715. rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
  716. if (rc < 0) {
  717. error_report("failed to enable ioreq server info: error %d handle=%p",
  718. errno, xen_xc);
  719. goto err;
  720. }
  721. state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus);
  722. /* FIXME: how about if we overflow the page here? */
  723. for (i = 0; i < max_cpus; i++) {
  724. rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
  725. xen_vcpu_eport(state->shared_page,
  726. i));
  727. if (rc == -1) {
  728. error_report("shared evtchn %d bind error %d", i, errno);
  729. goto err;
  730. }
  731. state->ioreq_local_port[i] = rc;
  732. }
  733. if (state->has_bufioreq) {
  734. rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
  735. state->bufioreq_remote_port);
  736. if (rc == -1) {
  737. error_report("buffered evtchn bind error %d", errno);
  738. goto err;
  739. }
  740. state->bufioreq_local_port = rc;
  741. }
  742. /* Init RAM management */
  743. #ifdef XEN_COMPAT_PHYSMAP
  744. xen_map_cache_init(xen_phys_offset_to_gaddr, state);
  745. #else
  746. xen_map_cache_init(NULL, state);
  747. #endif
  748. qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
  749. state->memory_listener = *xen_memory_listener;
  750. memory_listener_register(&state->memory_listener, &address_space_memory);
  751. state->io_listener = xen_io_listener;
  752. memory_listener_register(&state->io_listener, &address_space_io);
  753. state->device_listener = xen_device_listener;
  754. QLIST_INIT(&state->dev_list);
  755. device_listener_register(&state->device_listener);
  756. return;
  757. err:
  758. error_report("xen hardware virtual machine initialisation failed");
  759. exit(1);
  760. }
  761. void xen_register_ioreq(XenIOState *state, unsigned int max_cpus,
  762. uint8_t handle_bufioreq,
  763. const MemoryListener *xen_memory_listener)
  764. {
  765. int rc;
  766. setup_xen_backend_ops();
  767. state->xce_handle = qemu_xen_evtchn_open();
  768. if (state->xce_handle == NULL) {
  769. error_report("xen: event channel open failed with error %d", errno);
  770. goto err;
  771. }
  772. state->xenstore = xs_daemon_open();
  773. if (state->xenstore == NULL) {
  774. error_report("xen: xenstore open failed with error %d", errno);
  775. goto err;
  776. }
  777. state->has_bufioreq = handle_bufioreq != HVM_IOREQSRV_BUFIOREQ_OFF;
  778. rc = xen_create_ioreq_server(xen_domid, handle_bufioreq, &state->ioservid);
  779. if (!rc) {
  780. xen_do_ioreq_register(state, max_cpus, xen_memory_listener);
  781. } else {
  782. warn_report("xen: failed to create ioreq server");
  783. }
  784. xen_bus_init();
  785. return;
  786. err:
  787. error_report("xen hardware virtual machine backend registration failed");
  788. exit(1);
  789. }