2
0

xen-hvm-common.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925
  1. #include "qemu/osdep.h"
  2. #include "qemu/units.h"
  3. #include "qapi/error.h"
  4. #include "exec/target_page.h"
  5. #include "trace.h"
  6. #include "hw/pci/pci_host.h"
  7. #include "hw/xen/xen-hvm-common.h"
  8. #include "hw/xen/xen-bus.h"
  9. #include "hw/boards.h"
  10. #include "hw/xen/arch_hvm.h"
  11. MemoryRegion xen_memory, xen_grants;
  12. /* Check for any kind of xen memory, foreign mappings or grants. */
  13. bool xen_mr_is_memory(MemoryRegion *mr)
  14. {
  15. return mr == &xen_memory || mr == &xen_grants;
  16. }
  17. /* Check specifically for grants. */
  18. bool xen_mr_is_grants(MemoryRegion *mr)
  19. {
  20. return mr == &xen_grants;
  21. }
  22. void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size, MemoryRegion *mr,
  23. Error **errp)
  24. {
  25. unsigned target_page_bits = qemu_target_page_bits();
  26. unsigned long nr_pfn;
  27. xen_pfn_t *pfn_list;
  28. int i;
  29. if (runstate_check(RUN_STATE_INMIGRATE)) {
  30. /* RAM already populated in Xen */
  31. warn_report("%s: do not alloc "RAM_ADDR_FMT
  32. " bytes of ram at "RAM_ADDR_FMT" when runstate is INMIGRATE",
  33. __func__, size, ram_addr);
  34. return;
  35. }
  36. if (xen_mr_is_memory(mr)) {
  37. return;
  38. }
  39. trace_xen_ram_alloc(ram_addr, size);
  40. nr_pfn = size >> target_page_bits;
  41. pfn_list = g_new(xen_pfn_t, nr_pfn);
  42. for (i = 0; i < nr_pfn; i++) {
  43. pfn_list[i] = (ram_addr >> target_page_bits) + i;
  44. }
  45. if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
  46. error_setg(errp, "xen: failed to populate ram at " RAM_ADDR_FMT,
  47. ram_addr);
  48. }
  49. g_free(pfn_list);
  50. }
  51. static void xen_set_memory(struct MemoryListener *listener,
  52. MemoryRegionSection *section,
  53. bool add)
  54. {
  55. XenIOState *state = container_of(listener, XenIOState, memory_listener);
  56. if (xen_mr_is_memory(section->mr)) {
  57. return;
  58. } else {
  59. if (add) {
  60. xen_map_memory_section(xen_domid, state->ioservid,
  61. section);
  62. } else {
  63. xen_unmap_memory_section(xen_domid, state->ioservid,
  64. section);
  65. }
  66. }
  67. arch_xen_set_memory(state, section, add);
  68. }
  69. void xen_region_add(MemoryListener *listener,
  70. MemoryRegionSection *section)
  71. {
  72. memory_region_ref(section->mr);
  73. xen_set_memory(listener, section, true);
  74. }
  75. void xen_region_del(MemoryListener *listener,
  76. MemoryRegionSection *section)
  77. {
  78. xen_set_memory(listener, section, false);
  79. memory_region_unref(section->mr);
  80. }
  81. void xen_io_add(MemoryListener *listener,
  82. MemoryRegionSection *section)
  83. {
  84. XenIOState *state = container_of(listener, XenIOState, io_listener);
  85. MemoryRegion *mr = section->mr;
  86. if (mr->ops == &unassigned_io_ops) {
  87. return;
  88. }
  89. memory_region_ref(mr);
  90. xen_map_io_section(xen_domid, state->ioservid, section);
  91. }
  92. void xen_io_del(MemoryListener *listener,
  93. MemoryRegionSection *section)
  94. {
  95. XenIOState *state = container_of(listener, XenIOState, io_listener);
  96. MemoryRegion *mr = section->mr;
  97. if (mr->ops == &unassigned_io_ops) {
  98. return;
  99. }
  100. xen_unmap_io_section(xen_domid, state->ioservid, section);
  101. memory_region_unref(mr);
  102. }
  103. void xen_device_realize(DeviceListener *listener,
  104. DeviceState *dev)
  105. {
  106. XenIOState *state = container_of(listener, XenIOState, device_listener);
  107. if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  108. PCIDevice *pci_dev = PCI_DEVICE(dev);
  109. XenPciDevice *xendev = g_new(XenPciDevice, 1);
  110. xendev->pci_dev = pci_dev;
  111. xendev->sbdf = PCI_BUILD_BDF(pci_dev_bus_num(pci_dev),
  112. pci_dev->devfn);
  113. QLIST_INSERT_HEAD(&state->dev_list, xendev, entry);
  114. xen_map_pcidev(xen_domid, state->ioservid, pci_dev);
  115. }
  116. }
  117. void xen_device_unrealize(DeviceListener *listener,
  118. DeviceState *dev)
  119. {
  120. XenIOState *state = container_of(listener, XenIOState, device_listener);
  121. if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  122. PCIDevice *pci_dev = PCI_DEVICE(dev);
  123. XenPciDevice *xendev, *next;
  124. xen_unmap_pcidev(xen_domid, state->ioservid, pci_dev);
  125. QLIST_FOREACH_SAFE(xendev, &state->dev_list, entry, next) {
  126. if (xendev->pci_dev == pci_dev) {
  127. QLIST_REMOVE(xendev, entry);
  128. g_free(xendev);
  129. break;
  130. }
  131. }
  132. }
  133. }
  134. MemoryListener xen_io_listener = {
  135. .name = "xen-io",
  136. .region_add = xen_io_add,
  137. .region_del = xen_io_del,
  138. .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
  139. };
  140. DeviceListener xen_device_listener = {
  141. .realize = xen_device_realize,
  142. .unrealize = xen_device_unrealize,
  143. };
  144. /* get the ioreq packets from share mem */
  145. static ioreq_t *cpu_get_ioreq_from_shared_memory(XenIOState *state, int vcpu)
  146. {
  147. ioreq_t *req = xen_vcpu_ioreq(state->shared_page, vcpu);
  148. if (req->state != STATE_IOREQ_READY) {
  149. trace_cpu_get_ioreq_from_shared_memory_req_not_ready(req->state,
  150. req->data_is_ptr,
  151. req->addr,
  152. req->data,
  153. req->count,
  154. req->size);
  155. return NULL;
  156. }
  157. xen_rmb(); /* see IOREQ_READY /then/ read contents of ioreq */
  158. req->state = STATE_IOREQ_INPROCESS;
  159. return req;
  160. }
  161. /* use poll to get the port notification */
  162. /* ioreq_vec--out,the */
  163. /* retval--the number of ioreq packet */
  164. static ioreq_t *cpu_get_ioreq(XenIOState *state)
  165. {
  166. MachineState *ms = MACHINE(qdev_get_machine());
  167. unsigned int max_cpus = ms->smp.max_cpus;
  168. int i;
  169. evtchn_port_t port;
  170. port = qemu_xen_evtchn_pending(state->xce_handle);
  171. if (port == state->bufioreq_local_port) {
  172. timer_mod(state->buffered_io_timer,
  173. BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  174. return NULL;
  175. }
  176. if (port != -1) {
  177. for (i = 0; i < max_cpus; i++) {
  178. if (state->ioreq_local_port[i] == port) {
  179. break;
  180. }
  181. }
  182. if (i == max_cpus) {
  183. hw_error("Fatal error while trying to get io event!\n");
  184. }
  185. /* unmask the wanted port again */
  186. qemu_xen_evtchn_unmask(state->xce_handle, port);
  187. /* get the io packet from shared memory */
  188. state->send_vcpu = i;
  189. return cpu_get_ioreq_from_shared_memory(state, i);
  190. }
  191. /* read error or read nothing */
  192. return NULL;
  193. }
  194. static uint32_t do_inp(uint32_t addr, unsigned long size)
  195. {
  196. switch (size) {
  197. case 1:
  198. return cpu_inb(addr);
  199. case 2:
  200. return cpu_inw(addr);
  201. case 4:
  202. return cpu_inl(addr);
  203. default:
  204. hw_error("inp: bad size: %04x %lx", addr, size);
  205. }
  206. }
  207. static void do_outp(uint32_t addr,
  208. unsigned long size, uint32_t val)
  209. {
  210. switch (size) {
  211. case 1:
  212. return cpu_outb(addr, val);
  213. case 2:
  214. return cpu_outw(addr, val);
  215. case 4:
  216. return cpu_outl(addr, val);
  217. default:
  218. hw_error("outp: bad size: %04x %lx", addr, size);
  219. }
  220. }
  221. /*
  222. * Helper functions which read/write an object from/to physical guest
  223. * memory, as part of the implementation of an ioreq.
  224. *
  225. * Equivalent to
  226. * cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
  227. * val, req->size, 0/1)
  228. * except without the integer overflow problems.
  229. */
  230. static void rw_phys_req_item(hwaddr addr,
  231. ioreq_t *req, uint32_t i, void *val, int rw)
  232. {
  233. /* Do everything unsigned so overflow just results in a truncated result
  234. * and accesses to undesired parts of guest memory, which is up
  235. * to the guest */
  236. hwaddr offset = (hwaddr)req->size * i;
  237. if (req->df) {
  238. addr -= offset;
  239. } else {
  240. addr += offset;
  241. }
  242. cpu_physical_memory_rw(addr, val, req->size, rw);
  243. }
  244. static inline void read_phys_req_item(hwaddr addr,
  245. ioreq_t *req, uint32_t i, void *val)
  246. {
  247. rw_phys_req_item(addr, req, i, val, 0);
  248. }
  249. static inline void write_phys_req_item(hwaddr addr,
  250. ioreq_t *req, uint32_t i, void *val)
  251. {
  252. rw_phys_req_item(addr, req, i, val, 1);
  253. }
  254. void cpu_ioreq_pio(ioreq_t *req)
  255. {
  256. uint32_t i;
  257. trace_cpu_ioreq_pio(req, req->dir, req->df, req->data_is_ptr, req->addr,
  258. req->data, req->count, req->size);
  259. if (req->size > sizeof(uint32_t)) {
  260. hw_error("PIO: bad size (%u)", req->size);
  261. }
  262. if (req->dir == IOREQ_READ) {
  263. if (!req->data_is_ptr) {
  264. req->data = do_inp(req->addr, req->size);
  265. trace_cpu_ioreq_pio_read_reg(req, req->data, req->addr,
  266. req->size);
  267. } else {
  268. uint32_t tmp;
  269. for (i = 0; i < req->count; i++) {
  270. tmp = do_inp(req->addr, req->size);
  271. write_phys_req_item(req->data, req, i, &tmp);
  272. }
  273. }
  274. } else if (req->dir == IOREQ_WRITE) {
  275. if (!req->data_is_ptr) {
  276. trace_cpu_ioreq_pio_write_reg(req, req->data, req->addr,
  277. req->size);
  278. do_outp(req->addr, req->size, req->data);
  279. } else {
  280. for (i = 0; i < req->count; i++) {
  281. uint32_t tmp = 0;
  282. read_phys_req_item(req->data, req, i, &tmp);
  283. do_outp(req->addr, req->size, tmp);
  284. }
  285. }
  286. }
  287. }
  288. static void cpu_ioreq_move(ioreq_t *req)
  289. {
  290. uint32_t i;
  291. trace_cpu_ioreq_move(req, req->dir, req->df, req->data_is_ptr, req->addr,
  292. req->data, req->count, req->size);
  293. if (req->size > sizeof(req->data)) {
  294. hw_error("MMIO: bad size (%u)", req->size);
  295. }
  296. if (!req->data_is_ptr) {
  297. if (req->dir == IOREQ_READ) {
  298. for (i = 0; i < req->count; i++) {
  299. read_phys_req_item(req->addr, req, i, &req->data);
  300. }
  301. } else if (req->dir == IOREQ_WRITE) {
  302. for (i = 0; i < req->count; i++) {
  303. write_phys_req_item(req->addr, req, i, &req->data);
  304. }
  305. }
  306. } else {
  307. uint64_t tmp;
  308. if (req->dir == IOREQ_READ) {
  309. for (i = 0; i < req->count; i++) {
  310. read_phys_req_item(req->addr, req, i, &tmp);
  311. write_phys_req_item(req->data, req, i, &tmp);
  312. }
  313. } else if (req->dir == IOREQ_WRITE) {
  314. for (i = 0; i < req->count; i++) {
  315. read_phys_req_item(req->data, req, i, &tmp);
  316. write_phys_req_item(req->addr, req, i, &tmp);
  317. }
  318. }
  319. }
  320. }
  321. static void cpu_ioreq_config(XenIOState *state, ioreq_t *req)
  322. {
  323. uint32_t sbdf = req->addr >> 32;
  324. uint32_t reg = req->addr;
  325. XenPciDevice *xendev;
  326. if (req->size != sizeof(uint8_t) && req->size != sizeof(uint16_t) &&
  327. req->size != sizeof(uint32_t)) {
  328. hw_error("PCI config access: bad size (%u)", req->size);
  329. }
  330. if (req->count != 1) {
  331. hw_error("PCI config access: bad count (%u)", req->count);
  332. }
  333. QLIST_FOREACH(xendev, &state->dev_list, entry) {
  334. if (xendev->sbdf != sbdf) {
  335. continue;
  336. }
  337. if (!req->data_is_ptr) {
  338. if (req->dir == IOREQ_READ) {
  339. req->data = pci_host_config_read_common(
  340. xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
  341. req->size);
  342. trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
  343. req->size, req->data);
  344. } else if (req->dir == IOREQ_WRITE) {
  345. trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
  346. req->size, req->data);
  347. pci_host_config_write_common(
  348. xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
  349. req->data, req->size);
  350. }
  351. } else {
  352. uint32_t tmp;
  353. if (req->dir == IOREQ_READ) {
  354. tmp = pci_host_config_read_common(
  355. xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
  356. req->size);
  357. trace_cpu_ioreq_config_read(req, xendev->sbdf, reg,
  358. req->size, tmp);
  359. write_phys_req_item(req->data, req, 0, &tmp);
  360. } else if (req->dir == IOREQ_WRITE) {
  361. read_phys_req_item(req->data, req, 0, &tmp);
  362. trace_cpu_ioreq_config_write(req, xendev->sbdf, reg,
  363. req->size, tmp);
  364. pci_host_config_write_common(
  365. xendev->pci_dev, reg, PCI_CONFIG_SPACE_SIZE,
  366. tmp, req->size);
  367. }
  368. }
  369. }
  370. }
  371. static void handle_ioreq(XenIOState *state, ioreq_t *req)
  372. {
  373. trace_handle_ioreq(req, req->type, req->dir, req->df, req->data_is_ptr,
  374. req->addr, req->data, req->count, req->size);
  375. if (!req->data_is_ptr && (req->dir == IOREQ_WRITE) &&
  376. (req->size < sizeof (target_ulong))) {
  377. req->data &= ((target_ulong) 1 << (8 * req->size)) - 1;
  378. }
  379. if (req->dir == IOREQ_WRITE)
  380. trace_handle_ioreq_write(req, req->type, req->df, req->data_is_ptr,
  381. req->addr, req->data, req->count, req->size);
  382. switch (req->type) {
  383. case IOREQ_TYPE_PIO:
  384. cpu_ioreq_pio(req);
  385. break;
  386. case IOREQ_TYPE_COPY:
  387. cpu_ioreq_move(req);
  388. break;
  389. case IOREQ_TYPE_TIMEOFFSET:
  390. break;
  391. case IOREQ_TYPE_INVALIDATE:
  392. xen_invalidate_map_cache();
  393. break;
  394. case IOREQ_TYPE_PCI_CONFIG:
  395. cpu_ioreq_config(state, req);
  396. break;
  397. default:
  398. arch_handle_ioreq(state, req);
  399. }
  400. if (req->dir == IOREQ_READ) {
  401. trace_handle_ioreq_read(req, req->type, req->df, req->data_is_ptr,
  402. req->addr, req->data, req->count, req->size);
  403. }
  404. }
  405. static unsigned int handle_buffered_iopage(XenIOState *state)
  406. {
  407. buffered_iopage_t *buf_page = state->buffered_io_page;
  408. buf_ioreq_t *buf_req = NULL;
  409. unsigned int handled = 0;
  410. ioreq_t req;
  411. int qw;
  412. if (!buf_page) {
  413. return 0;
  414. }
  415. memset(&req, 0x00, sizeof(req));
  416. req.state = STATE_IOREQ_READY;
  417. req.count = 1;
  418. req.dir = IOREQ_WRITE;
  419. do {
  420. uint32_t rdptr = buf_page->read_pointer, wrptr;
  421. xen_rmb();
  422. wrptr = buf_page->write_pointer;
  423. xen_rmb();
  424. if (rdptr != buf_page->read_pointer) {
  425. continue;
  426. }
  427. if (rdptr == wrptr) {
  428. break;
  429. }
  430. buf_req = &buf_page->buf_ioreq[rdptr % IOREQ_BUFFER_SLOT_NUM];
  431. req.size = 1U << buf_req->size;
  432. req.addr = buf_req->addr;
  433. req.data = buf_req->data;
  434. req.type = buf_req->type;
  435. xen_rmb();
  436. qw = (req.size == 8);
  437. if (qw) {
  438. if (rdptr + 1 == wrptr) {
  439. hw_error("Incomplete quad word buffered ioreq");
  440. }
  441. buf_req = &buf_page->buf_ioreq[(rdptr + 1) %
  442. IOREQ_BUFFER_SLOT_NUM];
  443. req.data |= ((uint64_t)buf_req->data) << 32;
  444. xen_rmb();
  445. }
  446. handle_ioreq(state, &req);
  447. /* Only req.data may get updated by handle_ioreq(), albeit even that
  448. * should not happen as such data would never make it to the guest (we
  449. * can only usefully see writes here after all).
  450. */
  451. assert(req.state == STATE_IOREQ_READY);
  452. assert(req.count == 1);
  453. assert(req.dir == IOREQ_WRITE);
  454. assert(!req.data_is_ptr);
  455. qatomic_add(&buf_page->read_pointer, qw + 1);
  456. handled += qw + 1;
  457. } while (handled < IOREQ_BUFFER_SLOT_NUM);
  458. return handled;
  459. }
  460. static void handle_buffered_io(void *opaque)
  461. {
  462. unsigned int handled;
  463. XenIOState *state = opaque;
  464. handled = handle_buffered_iopage(state);
  465. if (handled >= IOREQ_BUFFER_SLOT_NUM) {
  466. /* We handled a full page of ioreqs. Schedule a timer to continue
  467. * processing while giving other stuff a chance to run.
  468. */
  469. timer_mod(state->buffered_io_timer,
  470. qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  471. } else if (handled == 0) {
  472. timer_del(state->buffered_io_timer);
  473. qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
  474. } else {
  475. timer_mod(state->buffered_io_timer,
  476. BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
  477. }
  478. }
  479. static void cpu_handle_ioreq(void *opaque)
  480. {
  481. XenIOState *state = opaque;
  482. ioreq_t *req = cpu_get_ioreq(state);
  483. handle_buffered_iopage(state);
  484. if (req) {
  485. ioreq_t copy = *req;
  486. xen_rmb();
  487. handle_ioreq(state, &copy);
  488. req->data = copy.data;
  489. if (req->state != STATE_IOREQ_INPROCESS) {
  490. warn_report("Badness in I/O request ... not in service?!: "
  491. "%x, ptr: %x, port: %"PRIx64", "
  492. "data: %"PRIx64", count: %u, size: %u, type: %u",
  493. req->state, req->data_is_ptr, req->addr,
  494. req->data, req->count, req->size, req->type);
  495. destroy_hvm_domain(false);
  496. return;
  497. }
  498. xen_wmb(); /* Update ioreq contents /then/ update state. */
  499. /*
  500. * We do this before we send the response so that the tools
  501. * have the opportunity to pick up on the reset before the
  502. * guest resumes and does a hlt with interrupts disabled which
  503. * causes Xen to powerdown the domain.
  504. */
  505. if (runstate_is_running()) {
  506. ShutdownCause request;
  507. if (qemu_shutdown_requested_get()) {
  508. destroy_hvm_domain(false);
  509. }
  510. request = qemu_reset_requested_get();
  511. if (request) {
  512. qemu_system_reset(request);
  513. destroy_hvm_domain(true);
  514. }
  515. }
  516. req->state = STATE_IORESP_READY;
  517. qemu_xen_evtchn_notify(state->xce_handle,
  518. state->ioreq_local_port[state->send_vcpu]);
  519. }
  520. }
  521. static void xen_main_loop_prepare(XenIOState *state)
  522. {
  523. int evtchn_fd = -1;
  524. if (state->xce_handle != NULL) {
  525. evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
  526. }
  527. state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
  528. state);
  529. if (evtchn_fd != -1) {
  530. CPUState *cpu_state;
  531. CPU_FOREACH(cpu_state) {
  532. trace_xen_main_loop_prepare_init_cpu(cpu_state->cpu_index,
  533. cpu_state);
  534. state->cpu_by_vcpu_id[cpu_state->cpu_index] = cpu_state;
  535. }
  536. qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
  537. }
  538. }
  539. void xen_hvm_change_state_handler(void *opaque, bool running,
  540. RunState rstate)
  541. {
  542. XenIOState *state = opaque;
  543. if (running) {
  544. xen_main_loop_prepare(state);
  545. }
  546. xen_set_ioreq_server_state(xen_domid,
  547. state->ioservid,
  548. running);
  549. }
  550. void xen_exit_notifier(Notifier *n, void *data)
  551. {
  552. XenIOState *state = container_of(n, XenIOState, exit);
  553. xen_destroy_ioreq_server(xen_domid, state->ioservid);
  554. if (state->fres != NULL) {
  555. xenforeignmemory_unmap_resource(xen_fmem, state->fres);
  556. }
  557. qemu_xen_evtchn_close(state->xce_handle);
  558. xs_daemon_close(state->xenstore);
  559. }
  560. static int xen_map_ioreq_server(XenIOState *state)
  561. {
  562. void *addr = NULL;
  563. xen_pfn_t ioreq_pfn;
  564. xen_pfn_t bufioreq_pfn;
  565. evtchn_port_t bufioreq_evtchn;
  566. unsigned long num_frames = 1;
  567. unsigned long frame = 1;
  568. int rc;
  569. /*
  570. * Attempt to map using the resource API and fall back to normal
  571. * foreign mapping if this is not supported.
  572. */
  573. QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_bufioreq != 0);
  574. QEMU_BUILD_BUG_ON(XENMEM_resource_ioreq_server_frame_ioreq(0) != 1);
  575. if (state->has_bufioreq) {
  576. frame = 0;
  577. num_frames = 2;
  578. }
  579. state->fres = xenforeignmemory_map_resource(xen_fmem, xen_domid,
  580. XENMEM_resource_ioreq_server,
  581. state->ioservid,
  582. frame, num_frames,
  583. &addr,
  584. PROT_READ | PROT_WRITE, 0);
  585. if (state->fres != NULL) {
  586. trace_xen_map_resource_ioreq(state->ioservid, addr);
  587. state->shared_page = addr;
  588. if (state->has_bufioreq) {
  589. state->buffered_io_page = addr;
  590. state->shared_page = addr + XC_PAGE_SIZE;
  591. }
  592. } else if (errno != EOPNOTSUPP) {
  593. error_report("failed to map ioreq server resources: error %d handle=%p",
  594. errno, xen_xc);
  595. return -1;
  596. }
  597. /*
  598. * If we fail to map the shared page with xenforeignmemory_map_resource()
  599. * or if we're using buffered ioreqs, we need xen_get_ioreq_server_info()
  600. * to provide the the addresses to map the shared page and/or to get the
  601. * event-channel port for buffered ioreqs.
  602. */
  603. if (state->shared_page == NULL || state->has_bufioreq) {
  604. rc = xen_get_ioreq_server_info(xen_domid, state->ioservid,
  605. (state->shared_page == NULL) ?
  606. &ioreq_pfn : NULL,
  607. (state->has_bufioreq &&
  608. state->buffered_io_page == NULL) ?
  609. &bufioreq_pfn : NULL,
  610. &bufioreq_evtchn);
  611. if (rc < 0) {
  612. error_report("failed to get ioreq server info: error %d handle=%p",
  613. errno, xen_xc);
  614. return rc;
  615. }
  616. if (state->shared_page == NULL) {
  617. trace_xen_map_ioreq_server_shared_page(ioreq_pfn);
  618. state->shared_page = xenforeignmemory_map(xen_fmem, xen_domid,
  619. PROT_READ | PROT_WRITE,
  620. 1, &ioreq_pfn, NULL);
  621. }
  622. if (state->shared_page == NULL) {
  623. error_report("map shared IO page returned error %d handle=%p",
  624. errno, xen_xc);
  625. }
  626. if (state->has_bufioreq && state->buffered_io_page == NULL) {
  627. trace_xen_map_ioreq_server_buffered_io_page(bufioreq_pfn);
  628. state->buffered_io_page = xenforeignmemory_map(xen_fmem, xen_domid,
  629. PROT_READ | PROT_WRITE,
  630. 1, &bufioreq_pfn,
  631. NULL);
  632. if (state->buffered_io_page == NULL) {
  633. error_report("map buffered IO page returned error %d", errno);
  634. return -1;
  635. }
  636. }
  637. }
  638. if (state->shared_page == NULL ||
  639. (state->has_bufioreq && state->buffered_io_page == NULL)) {
  640. return -1;
  641. }
  642. if (state->has_bufioreq) {
  643. trace_xen_map_ioreq_server_buffered_io_evtchn(bufioreq_evtchn);
  644. state->bufioreq_remote_port = bufioreq_evtchn;
  645. }
  646. return 0;
  647. }
  648. void destroy_hvm_domain(bool reboot)
  649. {
  650. xc_interface *xc_handle;
  651. int sts;
  652. int rc;
  653. unsigned int reason = reboot ? SHUTDOWN_reboot : SHUTDOWN_poweroff;
  654. if (xen_dmod) {
  655. rc = xendevicemodel_shutdown(xen_dmod, xen_domid, reason);
  656. if (!rc) {
  657. return;
  658. }
  659. if (errno != ENOTTY /* old Xen */) {
  660. error_report("xendevicemodel_shutdown failed with error %d", errno);
  661. }
  662. /* well, try the old thing then */
  663. }
  664. xc_handle = xc_interface_open(0, 0, 0);
  665. if (xc_handle == NULL) {
  666. trace_destroy_hvm_domain_cannot_acquire_handle();
  667. } else {
  668. sts = xc_domain_shutdown(xc_handle, xen_domid, reason);
  669. if (sts != 0) {
  670. trace_destroy_hvm_domain_failed_action(
  671. reboot ? "reboot" : "poweroff", sts, strerror(errno)
  672. );
  673. } else {
  674. trace_destroy_hvm_domain_action(
  675. xen_domid, reboot ? "reboot" : "poweroff"
  676. );
  677. }
  678. xc_interface_close(xc_handle);
  679. }
  680. }
  681. void xen_shutdown_fatal_error(const char *fmt, ...)
  682. {
  683. va_list ap;
  684. va_start(ap, fmt);
  685. error_vreport(fmt, ap);
  686. va_end(ap);
  687. error_report("Will destroy the domain.");
  688. /* destroy the domain */
  689. qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_ERROR);
  690. }
  691. static void xen_do_ioreq_register(XenIOState *state,
  692. unsigned int max_cpus,
  693. const MemoryListener *xen_memory_listener)
  694. {
  695. int i, rc;
  696. state->exit.notify = xen_exit_notifier;
  697. qemu_add_exit_notifier(&state->exit);
  698. /*
  699. * Register wake-up support in QMP query-current-machine API
  700. */
  701. qemu_register_wakeup_support();
  702. rc = xen_map_ioreq_server(state);
  703. if (rc < 0) {
  704. goto err;
  705. }
  706. /* Note: cpus is empty at this point in init */
  707. state->cpu_by_vcpu_id = g_new0(CPUState *, max_cpus);
  708. rc = xen_set_ioreq_server_state(xen_domid, state->ioservid, true);
  709. if (rc < 0) {
  710. error_report("failed to enable ioreq server info: error %d handle=%p",
  711. errno, xen_xc);
  712. goto err;
  713. }
  714. state->ioreq_local_port = g_new0(evtchn_port_t, max_cpus);
  715. /* FIXME: how about if we overflow the page here? */
  716. for (i = 0; i < max_cpus; i++) {
  717. rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
  718. xen_vcpu_eport(state->shared_page,
  719. i));
  720. if (rc == -1) {
  721. error_report("shared evtchn %d bind error %d", i, errno);
  722. goto err;
  723. }
  724. state->ioreq_local_port[i] = rc;
  725. }
  726. if (state->has_bufioreq) {
  727. rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
  728. state->bufioreq_remote_port);
  729. if (rc == -1) {
  730. error_report("buffered evtchn bind error %d", errno);
  731. goto err;
  732. }
  733. state->bufioreq_local_port = rc;
  734. }
  735. /* Init RAM management */
  736. #ifdef XEN_COMPAT_PHYSMAP
  737. xen_map_cache_init(xen_phys_offset_to_gaddr, state);
  738. #else
  739. xen_map_cache_init(NULL, state);
  740. #endif
  741. qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
  742. state->memory_listener = *xen_memory_listener;
  743. memory_listener_register(&state->memory_listener, &address_space_memory);
  744. state->io_listener = xen_io_listener;
  745. memory_listener_register(&state->io_listener, &address_space_io);
  746. state->device_listener = xen_device_listener;
  747. QLIST_INIT(&state->dev_list);
  748. device_listener_register(&state->device_listener);
  749. return;
  750. err:
  751. error_report("xen hardware virtual machine initialisation failed");
  752. exit(1);
  753. }
  754. void xen_register_ioreq(XenIOState *state, unsigned int max_cpus,
  755. uint8_t handle_bufioreq,
  756. const MemoryListener *xen_memory_listener)
  757. {
  758. int rc;
  759. setup_xen_backend_ops();
  760. state->xce_handle = qemu_xen_evtchn_open();
  761. if (state->xce_handle == NULL) {
  762. error_report("xen: event channel open failed with error %d", errno);
  763. goto err;
  764. }
  765. state->xenstore = xs_daemon_open();
  766. if (state->xenstore == NULL) {
  767. error_report("xen: xenstore open failed with error %d", errno);
  768. goto err;
  769. }
  770. state->has_bufioreq = handle_bufioreq != HVM_IOREQSRV_BUFIOREQ_OFF;
  771. rc = xen_create_ioreq_server(xen_domid, handle_bufioreq, &state->ioservid);
  772. if (!rc) {
  773. xen_do_ioreq_register(state, max_cpus, xen_memory_listener);
  774. } else {
  775. warn_report("xen: failed to create ioreq server");
  776. }
  777. xen_bus_init();
  778. return;
  779. err:
  780. error_report("xen hardware virtual machine backend registration failed");
  781. exit(1);
  782. }