kvm-all.c 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037
  1. /*
  2. * QEMU KVM support
  3. *
  4. * Copyright IBM, Corp. 2008
  5. * Red Hat, Inc. 2008
  6. *
  7. * Authors:
  8. * Anthony Liguori <aliguori@us.ibm.com>
  9. * Glauber Costa <gcosta@redhat.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12. * See the COPYING file in the top-level directory.
  13. *
  14. */
  15. #include <sys/types.h>
  16. #include <sys/ioctl.h>
  17. #include <sys/mman.h>
  18. #include <stdarg.h>
  19. #include <linux/kvm.h>
  20. #include "qemu-common.h"
  21. #include "qemu/atomic.h"
  22. #include "qemu/option.h"
  23. #include "qemu/config-file.h"
  24. #include "sysemu/sysemu.h"
  25. #include "hw/hw.h"
  26. #include "hw/pci/msi.h"
  27. #include "exec/gdbstub.h"
  28. #include "sysemu/kvm.h"
  29. #include "qemu/bswap.h"
  30. #include "exec/memory.h"
  31. #include "exec/address-spaces.h"
  32. #include "qemu/event_notifier.h"
  33. /* This check must be after config-host.h is included */
  34. #ifdef CONFIG_EVENTFD
  35. #include <sys/eventfd.h>
  36. #endif
  37. #ifdef CONFIG_VALGRIND_H
  38. #include <valgrind/memcheck.h>
  39. #endif
  40. /* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
  41. #define PAGE_SIZE TARGET_PAGE_SIZE
  42. //#define DEBUG_KVM
  43. #ifdef DEBUG_KVM
  44. #define DPRINTF(fmt, ...) \
  45. do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  46. #else
  47. #define DPRINTF(fmt, ...) \
  48. do { } while (0)
  49. #endif
  50. #define KVM_MSI_HASHTAB_SIZE 256
  51. typedef struct KVMSlot
  52. {
  53. hwaddr start_addr;
  54. ram_addr_t memory_size;
  55. void *ram;
  56. int slot;
  57. int flags;
  58. } KVMSlot;
  59. typedef struct kvm_dirty_log KVMDirtyLog;
  60. struct KVMState
  61. {
  62. KVMSlot slots[32];
  63. int fd;
  64. int vmfd;
  65. int coalesced_mmio;
  66. struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
  67. bool coalesced_flush_in_progress;
  68. int broken_set_mem_region;
  69. int migration_log;
  70. int vcpu_events;
  71. int robust_singlestep;
  72. int debugregs;
  73. #ifdef KVM_CAP_SET_GUEST_DEBUG
  74. struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
  75. #endif
  76. int pit_state2;
  77. int xsave, xcrs;
  78. int many_ioeventfds;
  79. int intx_set_mask;
  80. /* The man page (and posix) say ioctl numbers are signed int, but
  81. * they're not. Linux, glibc and *BSD all treat ioctl numbers as
  82. * unsigned, and treating them as signed here can break things */
  83. unsigned irq_set_ioctl;
  84. #ifdef KVM_CAP_IRQ_ROUTING
  85. struct kvm_irq_routing *irq_routes;
  86. int nr_allocated_irq_routes;
  87. uint32_t *used_gsi_bitmap;
  88. unsigned int gsi_count;
  89. QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
  90. bool direct_msi;
  91. #endif
  92. };
  93. KVMState *kvm_state;
  94. bool kvm_kernel_irqchip;
  95. bool kvm_async_interrupts_allowed;
  96. bool kvm_irqfds_allowed;
  97. bool kvm_msi_via_irqfd_allowed;
  98. bool kvm_gsi_routing_allowed;
  99. static const KVMCapabilityInfo kvm_required_capabilites[] = {
  100. KVM_CAP_INFO(USER_MEMORY),
  101. KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
  102. KVM_CAP_LAST_INFO
  103. };
  104. static KVMSlot *kvm_alloc_slot(KVMState *s)
  105. {
  106. int i;
  107. for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  108. if (s->slots[i].memory_size == 0) {
  109. return &s->slots[i];
  110. }
  111. }
  112. fprintf(stderr, "%s: no free slot available\n", __func__);
  113. abort();
  114. }
  115. static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
  116. hwaddr start_addr,
  117. hwaddr end_addr)
  118. {
  119. int i;
  120. for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  121. KVMSlot *mem = &s->slots[i];
  122. if (start_addr == mem->start_addr &&
  123. end_addr == mem->start_addr + mem->memory_size) {
  124. return mem;
  125. }
  126. }
  127. return NULL;
  128. }
  129. /*
  130. * Find overlapping slot with lowest start address
  131. */
  132. static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
  133. hwaddr start_addr,
  134. hwaddr end_addr)
  135. {
  136. KVMSlot *found = NULL;
  137. int i;
  138. for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  139. KVMSlot *mem = &s->slots[i];
  140. if (mem->memory_size == 0 ||
  141. (found && found->start_addr < mem->start_addr)) {
  142. continue;
  143. }
  144. if (end_addr > mem->start_addr &&
  145. start_addr < mem->start_addr + mem->memory_size) {
  146. found = mem;
  147. }
  148. }
  149. return found;
  150. }
  151. int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
  152. hwaddr *phys_addr)
  153. {
  154. int i;
  155. for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  156. KVMSlot *mem = &s->slots[i];
  157. if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
  158. *phys_addr = mem->start_addr + (ram - mem->ram);
  159. return 1;
  160. }
  161. }
  162. return 0;
  163. }
  164. static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
  165. {
  166. struct kvm_userspace_memory_region mem;
  167. mem.slot = slot->slot;
  168. mem.guest_phys_addr = slot->start_addr;
  169. mem.memory_size = slot->memory_size;
  170. mem.userspace_addr = (unsigned long)slot->ram;
  171. mem.flags = slot->flags;
  172. if (s->migration_log) {
  173. mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
  174. }
  175. return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
  176. }
  177. static void kvm_reset_vcpu(void *opaque)
  178. {
  179. CPUState *cpu = opaque;
  180. kvm_arch_reset_vcpu(cpu);
  181. }
  182. int kvm_init_vcpu(CPUState *cpu)
  183. {
  184. KVMState *s = kvm_state;
  185. long mmap_size;
  186. int ret;
  187. DPRINTF("kvm_init_vcpu\n");
  188. ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu));
  189. if (ret < 0) {
  190. DPRINTF("kvm_create_vcpu failed\n");
  191. goto err;
  192. }
  193. cpu->kvm_fd = ret;
  194. cpu->kvm_state = s;
  195. cpu->kvm_vcpu_dirty = true;
  196. mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
  197. if (mmap_size < 0) {
  198. ret = mmap_size;
  199. DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
  200. goto err;
  201. }
  202. cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
  203. cpu->kvm_fd, 0);
  204. if (cpu->kvm_run == MAP_FAILED) {
  205. ret = -errno;
  206. DPRINTF("mmap'ing vcpu state failed\n");
  207. goto err;
  208. }
  209. if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
  210. s->coalesced_mmio_ring =
  211. (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
  212. }
  213. ret = kvm_arch_init_vcpu(cpu);
  214. if (ret == 0) {
  215. qemu_register_reset(kvm_reset_vcpu, cpu);
  216. kvm_arch_reset_vcpu(cpu);
  217. }
  218. err:
  219. return ret;
  220. }
  221. /*
  222. * dirty pages logging control
  223. */
  224. static int kvm_mem_flags(KVMState *s, bool log_dirty)
  225. {
  226. return log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
  227. }
  228. static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
  229. {
  230. KVMState *s = kvm_state;
  231. int flags, mask = KVM_MEM_LOG_DIRTY_PAGES;
  232. int old_flags;
  233. old_flags = mem->flags;
  234. flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty);
  235. mem->flags = flags;
  236. /* If nothing changed effectively, no need to issue ioctl */
  237. if (s->migration_log) {
  238. flags |= KVM_MEM_LOG_DIRTY_PAGES;
  239. }
  240. if (flags == old_flags) {
  241. return 0;
  242. }
  243. return kvm_set_user_memory_region(s, mem);
  244. }
  245. static int kvm_dirty_pages_log_change(hwaddr phys_addr,
  246. ram_addr_t size, bool log_dirty)
  247. {
  248. KVMState *s = kvm_state;
  249. KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
  250. if (mem == NULL) {
  251. fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
  252. TARGET_FMT_plx "\n", __func__, phys_addr,
  253. (hwaddr)(phys_addr + size - 1));
  254. return -EINVAL;
  255. }
  256. return kvm_slot_dirty_pages_log_change(mem, log_dirty);
  257. }
  258. static void kvm_log_start(MemoryListener *listener,
  259. MemoryRegionSection *section)
  260. {
  261. int r;
  262. r = kvm_dirty_pages_log_change(section->offset_within_address_space,
  263. section->size, true);
  264. if (r < 0) {
  265. abort();
  266. }
  267. }
  268. static void kvm_log_stop(MemoryListener *listener,
  269. MemoryRegionSection *section)
  270. {
  271. int r;
  272. r = kvm_dirty_pages_log_change(section->offset_within_address_space,
  273. section->size, false);
  274. if (r < 0) {
  275. abort();
  276. }
  277. }
  278. static int kvm_set_migration_log(int enable)
  279. {
  280. KVMState *s = kvm_state;
  281. KVMSlot *mem;
  282. int i, err;
  283. s->migration_log = enable;
  284. for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  285. mem = &s->slots[i];
  286. if (!mem->memory_size) {
  287. continue;
  288. }
  289. if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
  290. continue;
  291. }
  292. err = kvm_set_user_memory_region(s, mem);
  293. if (err) {
  294. return err;
  295. }
  296. }
  297. return 0;
  298. }
  299. /* get kvm's dirty pages bitmap and update qemu's */
  300. static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
  301. unsigned long *bitmap)
  302. {
  303. unsigned int i, j;
  304. unsigned long page_number, c;
  305. hwaddr addr, addr1;
  306. unsigned int len = ((section->size / getpagesize()) + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
  307. unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
  308. /*
  309. * bitmap-traveling is faster than memory-traveling (for addr...)
  310. * especially when most of the memory is not dirty.
  311. */
  312. for (i = 0; i < len; i++) {
  313. if (bitmap[i] != 0) {
  314. c = leul_to_cpu(bitmap[i]);
  315. do {
  316. j = ffsl(c) - 1;
  317. c &= ~(1ul << j);
  318. page_number = (i * HOST_LONG_BITS + j) * hpratio;
  319. addr1 = page_number * TARGET_PAGE_SIZE;
  320. addr = section->offset_within_region + addr1;
  321. memory_region_set_dirty(section->mr, addr,
  322. TARGET_PAGE_SIZE * hpratio);
  323. } while (c != 0);
  324. }
  325. }
  326. return 0;
  327. }
  328. #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
  329. /**
  330. * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
  331. * This function updates qemu's dirty bitmap using
  332. * memory_region_set_dirty(). This means all bits are set
  333. * to dirty.
  334. *
  335. * @start_add: start of logged region.
  336. * @end_addr: end of logged region.
  337. */
  338. static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
  339. {
  340. KVMState *s = kvm_state;
  341. unsigned long size, allocated_size = 0;
  342. KVMDirtyLog d;
  343. KVMSlot *mem;
  344. int ret = 0;
  345. hwaddr start_addr = section->offset_within_address_space;
  346. hwaddr end_addr = start_addr + section->size;
  347. d.dirty_bitmap = NULL;
  348. while (start_addr < end_addr) {
  349. mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
  350. if (mem == NULL) {
  351. break;
  352. }
  353. /* XXX bad kernel interface alert
  354. * For dirty bitmap, kernel allocates array of size aligned to
  355. * bits-per-long. But for case when the kernel is 64bits and
  356. * the userspace is 32bits, userspace can't align to the same
  357. * bits-per-long, since sizeof(long) is different between kernel
  358. * and user space. This way, userspace will provide buffer which
  359. * may be 4 bytes less than the kernel will use, resulting in
  360. * userspace memory corruption (which is not detectable by valgrind
  361. * too, in most cases).
  362. * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
  363. * a hope that sizeof(long) wont become >8 any time soon.
  364. */
  365. size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
  366. /*HOST_LONG_BITS*/ 64) / 8;
  367. if (!d.dirty_bitmap) {
  368. d.dirty_bitmap = g_malloc(size);
  369. } else if (size > allocated_size) {
  370. d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
  371. }
  372. allocated_size = size;
  373. memset(d.dirty_bitmap, 0, allocated_size);
  374. d.slot = mem->slot;
  375. if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
  376. DPRINTF("ioctl failed %d\n", errno);
  377. ret = -1;
  378. break;
  379. }
  380. kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
  381. start_addr = mem->start_addr + mem->memory_size;
  382. }
  383. g_free(d.dirty_bitmap);
  384. return ret;
  385. }
  386. static void kvm_coalesce_mmio_region(MemoryListener *listener,
  387. MemoryRegionSection *secion,
  388. hwaddr start, hwaddr size)
  389. {
  390. KVMState *s = kvm_state;
  391. if (s->coalesced_mmio) {
  392. struct kvm_coalesced_mmio_zone zone;
  393. zone.addr = start;
  394. zone.size = size;
  395. zone.pad = 0;
  396. (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
  397. }
  398. }
  399. static void kvm_uncoalesce_mmio_region(MemoryListener *listener,
  400. MemoryRegionSection *secion,
  401. hwaddr start, hwaddr size)
  402. {
  403. KVMState *s = kvm_state;
  404. if (s->coalesced_mmio) {
  405. struct kvm_coalesced_mmio_zone zone;
  406. zone.addr = start;
  407. zone.size = size;
  408. zone.pad = 0;
  409. (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
  410. }
  411. }
  412. int kvm_check_extension(KVMState *s, unsigned int extension)
  413. {
  414. int ret;
  415. ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
  416. if (ret < 0) {
  417. ret = 0;
  418. }
  419. return ret;
  420. }
  421. static int kvm_check_many_ioeventfds(void)
  422. {
  423. /* Userspace can use ioeventfd for io notification. This requires a host
  424. * that supports eventfd(2) and an I/O thread; since eventfd does not
  425. * support SIGIO it cannot interrupt the vcpu.
  426. *
  427. * Older kernels have a 6 device limit on the KVM io bus. Find out so we
  428. * can avoid creating too many ioeventfds.
  429. */
  430. #if defined(CONFIG_EVENTFD)
  431. int ioeventfds[7];
  432. int i, ret = 0;
  433. for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
  434. ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
  435. if (ioeventfds[i] < 0) {
  436. break;
  437. }
  438. ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true);
  439. if (ret < 0) {
  440. close(ioeventfds[i]);
  441. break;
  442. }
  443. }
  444. /* Decide whether many devices are supported or not */
  445. ret = i == ARRAY_SIZE(ioeventfds);
  446. while (i-- > 0) {
  447. kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false);
  448. close(ioeventfds[i]);
  449. }
  450. return ret;
  451. #else
  452. return 0;
  453. #endif
  454. }
  455. static const KVMCapabilityInfo *
  456. kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
  457. {
  458. while (list->name) {
  459. if (!kvm_check_extension(s, list->value)) {
  460. return list;
  461. }
  462. list++;
  463. }
  464. return NULL;
  465. }
  466. static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
  467. {
  468. KVMState *s = kvm_state;
  469. KVMSlot *mem, old;
  470. int err;
  471. MemoryRegion *mr = section->mr;
  472. bool log_dirty = memory_region_is_logging(mr);
  473. hwaddr start_addr = section->offset_within_address_space;
  474. ram_addr_t size = section->size;
  475. void *ram = NULL;
  476. unsigned delta;
  477. /* kvm works in page size chunks, but the function may be called
  478. with sub-page size and unaligned start address. */
  479. delta = TARGET_PAGE_ALIGN(size) - size;
  480. if (delta > size) {
  481. return;
  482. }
  483. start_addr += delta;
  484. size -= delta;
  485. size &= TARGET_PAGE_MASK;
  486. if (!size || (start_addr & ~TARGET_PAGE_MASK)) {
  487. return;
  488. }
  489. if (!memory_region_is_ram(mr)) {
  490. return;
  491. }
  492. ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta;
  493. while (1) {
  494. mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
  495. if (!mem) {
  496. break;
  497. }
  498. if (add && start_addr >= mem->start_addr &&
  499. (start_addr + size <= mem->start_addr + mem->memory_size) &&
  500. (ram - start_addr == mem->ram - mem->start_addr)) {
  501. /* The new slot fits into the existing one and comes with
  502. * identical parameters - update flags and done. */
  503. kvm_slot_dirty_pages_log_change(mem, log_dirty);
  504. return;
  505. }
  506. old = *mem;
  507. if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
  508. kvm_physical_sync_dirty_bitmap(section);
  509. }
  510. /* unregister the overlapping slot */
  511. mem->memory_size = 0;
  512. err = kvm_set_user_memory_region(s, mem);
  513. if (err) {
  514. fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
  515. __func__, strerror(-err));
  516. abort();
  517. }
  518. /* Workaround for older KVM versions: we can't join slots, even not by
  519. * unregistering the previous ones and then registering the larger
  520. * slot. We have to maintain the existing fragmentation. Sigh.
  521. *
  522. * This workaround assumes that the new slot starts at the same
  523. * address as the first existing one. If not or if some overlapping
  524. * slot comes around later, we will fail (not seen in practice so far)
  525. * - and actually require a recent KVM version. */
  526. if (s->broken_set_mem_region &&
  527. old.start_addr == start_addr && old.memory_size < size && add) {
  528. mem = kvm_alloc_slot(s);
  529. mem->memory_size = old.memory_size;
  530. mem->start_addr = old.start_addr;
  531. mem->ram = old.ram;
  532. mem->flags = kvm_mem_flags(s, log_dirty);
  533. err = kvm_set_user_memory_region(s, mem);
  534. if (err) {
  535. fprintf(stderr, "%s: error updating slot: %s\n", __func__,
  536. strerror(-err));
  537. abort();
  538. }
  539. start_addr += old.memory_size;
  540. ram += old.memory_size;
  541. size -= old.memory_size;
  542. continue;
  543. }
  544. /* register prefix slot */
  545. if (old.start_addr < start_addr) {
  546. mem = kvm_alloc_slot(s);
  547. mem->memory_size = start_addr - old.start_addr;
  548. mem->start_addr = old.start_addr;
  549. mem->ram = old.ram;
  550. mem->flags = kvm_mem_flags(s, log_dirty);
  551. err = kvm_set_user_memory_region(s, mem);
  552. if (err) {
  553. fprintf(stderr, "%s: error registering prefix slot: %s\n",
  554. __func__, strerror(-err));
  555. #ifdef TARGET_PPC
  556. fprintf(stderr, "%s: This is probably because your kernel's " \
  557. "PAGE_SIZE is too big. Please try to use 4k " \
  558. "PAGE_SIZE!\n", __func__);
  559. #endif
  560. abort();
  561. }
  562. }
  563. /* register suffix slot */
  564. if (old.start_addr + old.memory_size > start_addr + size) {
  565. ram_addr_t size_delta;
  566. mem = kvm_alloc_slot(s);
  567. mem->start_addr = start_addr + size;
  568. size_delta = mem->start_addr - old.start_addr;
  569. mem->memory_size = old.memory_size - size_delta;
  570. mem->ram = old.ram + size_delta;
  571. mem->flags = kvm_mem_flags(s, log_dirty);
  572. err = kvm_set_user_memory_region(s, mem);
  573. if (err) {
  574. fprintf(stderr, "%s: error registering suffix slot: %s\n",
  575. __func__, strerror(-err));
  576. abort();
  577. }
  578. }
  579. }
  580. /* in case the KVM bug workaround already "consumed" the new slot */
  581. if (!size) {
  582. return;
  583. }
  584. if (!add) {
  585. return;
  586. }
  587. mem = kvm_alloc_slot(s);
  588. mem->memory_size = size;
  589. mem->start_addr = start_addr;
  590. mem->ram = ram;
  591. mem->flags = kvm_mem_flags(s, log_dirty);
  592. err = kvm_set_user_memory_region(s, mem);
  593. if (err) {
  594. fprintf(stderr, "%s: error registering slot: %s\n", __func__,
  595. strerror(-err));
  596. abort();
  597. }
  598. }
  599. static void kvm_region_add(MemoryListener *listener,
  600. MemoryRegionSection *section)
  601. {
  602. kvm_set_phys_mem(section, true);
  603. }
  604. static void kvm_region_del(MemoryListener *listener,
  605. MemoryRegionSection *section)
  606. {
  607. kvm_set_phys_mem(section, false);
  608. }
  609. static void kvm_log_sync(MemoryListener *listener,
  610. MemoryRegionSection *section)
  611. {
  612. int r;
  613. r = kvm_physical_sync_dirty_bitmap(section);
  614. if (r < 0) {
  615. abort();
  616. }
  617. }
  618. static void kvm_log_global_start(struct MemoryListener *listener)
  619. {
  620. int r;
  621. r = kvm_set_migration_log(1);
  622. assert(r >= 0);
  623. }
  624. static void kvm_log_global_stop(struct MemoryListener *listener)
  625. {
  626. int r;
  627. r = kvm_set_migration_log(0);
  628. assert(r >= 0);
  629. }
  630. static void kvm_mem_ioeventfd_add(MemoryListener *listener,
  631. MemoryRegionSection *section,
  632. bool match_data, uint64_t data,
  633. EventNotifier *e)
  634. {
  635. int fd = event_notifier_get_fd(e);
  636. int r;
  637. assert(match_data && section->size <= 8);
  638. r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
  639. data, true, section->size);
  640. if (r < 0) {
  641. abort();
  642. }
  643. }
  644. static void kvm_mem_ioeventfd_del(MemoryListener *listener,
  645. MemoryRegionSection *section,
  646. bool match_data, uint64_t data,
  647. EventNotifier *e)
  648. {
  649. int fd = event_notifier_get_fd(e);
  650. int r;
  651. r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
  652. data, false, section->size);
  653. if (r < 0) {
  654. abort();
  655. }
  656. }
  657. static void kvm_io_ioeventfd_add(MemoryListener *listener,
  658. MemoryRegionSection *section,
  659. bool match_data, uint64_t data,
  660. EventNotifier *e)
  661. {
  662. int fd = event_notifier_get_fd(e);
  663. int r;
  664. assert(match_data && section->size == 2);
  665. r = kvm_set_ioeventfd_pio_word(fd, section->offset_within_address_space,
  666. data, true);
  667. if (r < 0) {
  668. abort();
  669. }
  670. }
  671. static void kvm_io_ioeventfd_del(MemoryListener *listener,
  672. MemoryRegionSection *section,
  673. bool match_data, uint64_t data,
  674. EventNotifier *e)
  675. {
  676. int fd = event_notifier_get_fd(e);
  677. int r;
  678. r = kvm_set_ioeventfd_pio_word(fd, section->offset_within_address_space,
  679. data, false);
  680. if (r < 0) {
  681. abort();
  682. }
  683. }
  684. static MemoryListener kvm_memory_listener = {
  685. .region_add = kvm_region_add,
  686. .region_del = kvm_region_del,
  687. .log_start = kvm_log_start,
  688. .log_stop = kvm_log_stop,
  689. .log_sync = kvm_log_sync,
  690. .log_global_start = kvm_log_global_start,
  691. .log_global_stop = kvm_log_global_stop,
  692. .eventfd_add = kvm_mem_ioeventfd_add,
  693. .eventfd_del = kvm_mem_ioeventfd_del,
  694. .coalesced_mmio_add = kvm_coalesce_mmio_region,
  695. .coalesced_mmio_del = kvm_uncoalesce_mmio_region,
  696. .priority = 10,
  697. };
  698. static MemoryListener kvm_io_listener = {
  699. .eventfd_add = kvm_io_ioeventfd_add,
  700. .eventfd_del = kvm_io_ioeventfd_del,
  701. .priority = 10,
  702. };
  703. static void kvm_handle_interrupt(CPUArchState *env, int mask)
  704. {
  705. CPUState *cpu = ENV_GET_CPU(env);
  706. env->interrupt_request |= mask;
  707. if (!qemu_cpu_is_self(cpu)) {
  708. qemu_cpu_kick(cpu);
  709. }
  710. }
  711. int kvm_set_irq(KVMState *s, int irq, int level)
  712. {
  713. struct kvm_irq_level event;
  714. int ret;
  715. assert(kvm_async_interrupts_enabled());
  716. event.level = level;
  717. event.irq = irq;
  718. ret = kvm_vm_ioctl(s, s->irq_set_ioctl, &event);
  719. if (ret < 0) {
  720. perror("kvm_set_irq");
  721. abort();
  722. }
  723. return (s->irq_set_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
  724. }
  725. #ifdef KVM_CAP_IRQ_ROUTING
  726. typedef struct KVMMSIRoute {
  727. struct kvm_irq_routing_entry kroute;
  728. QTAILQ_ENTRY(KVMMSIRoute) entry;
  729. } KVMMSIRoute;
  730. static void set_gsi(KVMState *s, unsigned int gsi)
  731. {
  732. s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
  733. }
  734. static void clear_gsi(KVMState *s, unsigned int gsi)
  735. {
  736. s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32));
  737. }
  738. static void kvm_init_irq_routing(KVMState *s)
  739. {
  740. int gsi_count, i;
  741. gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING);
  742. if (gsi_count > 0) {
  743. unsigned int gsi_bits, i;
  744. /* Round up so we can search ints using ffs */
  745. gsi_bits = ALIGN(gsi_count, 32);
  746. s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
  747. s->gsi_count = gsi_count;
  748. /* Mark any over-allocated bits as already in use */
  749. for (i = gsi_count; i < gsi_bits; i++) {
  750. set_gsi(s, i);
  751. }
  752. }
  753. s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
  754. s->nr_allocated_irq_routes = 0;
  755. if (!s->direct_msi) {
  756. for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
  757. QTAILQ_INIT(&s->msi_hashtab[i]);
  758. }
  759. }
  760. kvm_arch_init_irq_routing(s);
  761. }
  762. static void kvm_irqchip_commit_routes(KVMState *s)
  763. {
  764. int ret;
  765. s->irq_routes->flags = 0;
  766. ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
  767. assert(ret == 0);
  768. }
  769. static void kvm_add_routing_entry(KVMState *s,
  770. struct kvm_irq_routing_entry *entry)
  771. {
  772. struct kvm_irq_routing_entry *new;
  773. int n, size;
  774. if (s->irq_routes->nr == s->nr_allocated_irq_routes) {
  775. n = s->nr_allocated_irq_routes * 2;
  776. if (n < 64) {
  777. n = 64;
  778. }
  779. size = sizeof(struct kvm_irq_routing);
  780. size += n * sizeof(*new);
  781. s->irq_routes = g_realloc(s->irq_routes, size);
  782. s->nr_allocated_irq_routes = n;
  783. }
  784. n = s->irq_routes->nr++;
  785. new = &s->irq_routes->entries[n];
  786. memset(new, 0, sizeof(*new));
  787. new->gsi = entry->gsi;
  788. new->type = entry->type;
  789. new->flags = entry->flags;
  790. new->u = entry->u;
  791. set_gsi(s, entry->gsi);
  792. kvm_irqchip_commit_routes(s);
  793. }
  794. static int kvm_update_routing_entry(KVMState *s,
  795. struct kvm_irq_routing_entry *new_entry)
  796. {
  797. struct kvm_irq_routing_entry *entry;
  798. int n;
  799. for (n = 0; n < s->irq_routes->nr; n++) {
  800. entry = &s->irq_routes->entries[n];
  801. if (entry->gsi != new_entry->gsi) {
  802. continue;
  803. }
  804. entry->type = new_entry->type;
  805. entry->flags = new_entry->flags;
  806. entry->u = new_entry->u;
  807. kvm_irqchip_commit_routes(s);
  808. return 0;
  809. }
  810. return -ESRCH;
  811. }
  812. void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin)
  813. {
  814. struct kvm_irq_routing_entry e;
  815. assert(pin < s->gsi_count);
  816. e.gsi = irq;
  817. e.type = KVM_IRQ_ROUTING_IRQCHIP;
  818. e.flags = 0;
  819. e.u.irqchip.irqchip = irqchip;
  820. e.u.irqchip.pin = pin;
  821. kvm_add_routing_entry(s, &e);
  822. }
  823. void kvm_irqchip_release_virq(KVMState *s, int virq)
  824. {
  825. struct kvm_irq_routing_entry *e;
  826. int i;
  827. for (i = 0; i < s->irq_routes->nr; i++) {
  828. e = &s->irq_routes->entries[i];
  829. if (e->gsi == virq) {
  830. s->irq_routes->nr--;
  831. *e = s->irq_routes->entries[s->irq_routes->nr];
  832. }
  833. }
  834. clear_gsi(s, virq);
  835. }
  836. static unsigned int kvm_hash_msi(uint32_t data)
  837. {
  838. /* This is optimized for IA32 MSI layout. However, no other arch shall
  839. * repeat the mistake of not providing a direct MSI injection API. */
  840. return data & 0xff;
  841. }
  842. static void kvm_flush_dynamic_msi_routes(KVMState *s)
  843. {
  844. KVMMSIRoute *route, *next;
  845. unsigned int hash;
  846. for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) {
  847. QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) {
  848. kvm_irqchip_release_virq(s, route->kroute.gsi);
  849. QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry);
  850. g_free(route);
  851. }
  852. }
  853. }
  854. static int kvm_irqchip_get_virq(KVMState *s)
  855. {
  856. uint32_t *word = s->used_gsi_bitmap;
  857. int max_words = ALIGN(s->gsi_count, 32) / 32;
  858. int i, bit;
  859. bool retry = true;
  860. again:
  861. /* Return the lowest unused GSI in the bitmap */
  862. for (i = 0; i < max_words; i++) {
  863. bit = ffs(~word[i]);
  864. if (!bit) {
  865. continue;
  866. }
  867. return bit - 1 + i * 32;
  868. }
  869. if (!s->direct_msi && retry) {
  870. retry = false;
  871. kvm_flush_dynamic_msi_routes(s);
  872. goto again;
  873. }
  874. return -ENOSPC;
  875. }
  876. static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
  877. {
  878. unsigned int hash = kvm_hash_msi(msg.data);
  879. KVMMSIRoute *route;
  880. QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) {
  881. if (route->kroute.u.msi.address_lo == (uint32_t)msg.address &&
  882. route->kroute.u.msi.address_hi == (msg.address >> 32) &&
  883. route->kroute.u.msi.data == msg.data) {
  884. return route;
  885. }
  886. }
  887. return NULL;
  888. }
  889. int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
  890. {
  891. struct kvm_msi msi;
  892. KVMMSIRoute *route;
  893. if (s->direct_msi) {
  894. msi.address_lo = (uint32_t)msg.address;
  895. msi.address_hi = msg.address >> 32;
  896. msi.data = msg.data;
  897. msi.flags = 0;
  898. memset(msi.pad, 0, sizeof(msi.pad));
  899. return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi);
  900. }
  901. route = kvm_lookup_msi_route(s, msg);
  902. if (!route) {
  903. int virq;
  904. virq = kvm_irqchip_get_virq(s);
  905. if (virq < 0) {
  906. return virq;
  907. }
  908. route = g_malloc(sizeof(KVMMSIRoute));
  909. route->kroute.gsi = virq;
  910. route->kroute.type = KVM_IRQ_ROUTING_MSI;
  911. route->kroute.flags = 0;
  912. route->kroute.u.msi.address_lo = (uint32_t)msg.address;
  913. route->kroute.u.msi.address_hi = msg.address >> 32;
  914. route->kroute.u.msi.data = msg.data;
  915. kvm_add_routing_entry(s, &route->kroute);
  916. QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route,
  917. entry);
  918. }
  919. assert(route->kroute.type == KVM_IRQ_ROUTING_MSI);
  920. return kvm_set_irq(s, route->kroute.gsi, 1);
  921. }
  922. int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
  923. {
  924. struct kvm_irq_routing_entry kroute;
  925. int virq;
  926. if (!kvm_gsi_routing_enabled()) {
  927. return -ENOSYS;
  928. }
  929. virq = kvm_irqchip_get_virq(s);
  930. if (virq < 0) {
  931. return virq;
  932. }
  933. kroute.gsi = virq;
  934. kroute.type = KVM_IRQ_ROUTING_MSI;
  935. kroute.flags = 0;
  936. kroute.u.msi.address_lo = (uint32_t)msg.address;
  937. kroute.u.msi.address_hi = msg.address >> 32;
  938. kroute.u.msi.data = msg.data;
  939. kvm_add_routing_entry(s, &kroute);
  940. return virq;
  941. }
  942. int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
  943. {
  944. struct kvm_irq_routing_entry kroute;
  945. if (!kvm_irqchip_in_kernel()) {
  946. return -ENOSYS;
  947. }
  948. kroute.gsi = virq;
  949. kroute.type = KVM_IRQ_ROUTING_MSI;
  950. kroute.flags = 0;
  951. kroute.u.msi.address_lo = (uint32_t)msg.address;
  952. kroute.u.msi.address_hi = msg.address >> 32;
  953. kroute.u.msi.data = msg.data;
  954. return kvm_update_routing_entry(s, &kroute);
  955. }
  956. static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
  957. {
  958. struct kvm_irqfd irqfd = {
  959. .fd = fd,
  960. .gsi = virq,
  961. .flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
  962. };
  963. if (!kvm_irqfds_enabled()) {
  964. return -ENOSYS;
  965. }
  966. return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
  967. }
  968. #else /* !KVM_CAP_IRQ_ROUTING */
  969. static void kvm_init_irq_routing(KVMState *s)
  970. {
  971. }
  972. void kvm_irqchip_release_virq(KVMState *s, int virq)
  973. {
  974. }
  975. int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
  976. {
  977. abort();
  978. }
  979. int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
  980. {
  981. return -ENOSYS;
  982. }
  983. static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
  984. {
  985. abort();
  986. }
  987. int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
  988. {
  989. return -ENOSYS;
  990. }
  991. #endif /* !KVM_CAP_IRQ_ROUTING */
  992. int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
  993. {
  994. return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), virq, true);
  995. }
  996. int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
  997. {
  998. return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), virq, false);
  999. }
  1000. static int kvm_irqchip_create(KVMState *s)
  1001. {
  1002. QemuOptsList *list = qemu_find_opts("machine");
  1003. int ret;
  1004. if (QTAILQ_EMPTY(&list->head) ||
  1005. !qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
  1006. "kernel_irqchip", true) ||
  1007. !kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
  1008. return 0;
  1009. }
  1010. ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
  1011. if (ret < 0) {
  1012. fprintf(stderr, "Create kernel irqchip failed\n");
  1013. return ret;
  1014. }
  1015. kvm_kernel_irqchip = true;
  1016. /* If we have an in-kernel IRQ chip then we must have asynchronous
  1017. * interrupt delivery (though the reverse is not necessarily true)
  1018. */
  1019. kvm_async_interrupts_allowed = true;
  1020. kvm_init_irq_routing(s);
  1021. return 0;
  1022. }
  1023. static int kvm_max_vcpus(KVMState *s)
  1024. {
  1025. int ret;
  1026. /* Find number of supported CPUs using the recommended
  1027. * procedure from the kernel API documentation to cope with
  1028. * older kernels that may be missing capabilities.
  1029. */
  1030. ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
  1031. if (ret) {
  1032. return ret;
  1033. }
  1034. ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS);
  1035. if (ret) {
  1036. return ret;
  1037. }
  1038. return 4;
  1039. }
  1040. int kvm_init(void)
  1041. {
  1042. static const char upgrade_note[] =
  1043. "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
  1044. "(see http://sourceforge.net/projects/kvm).\n";
  1045. KVMState *s;
  1046. const KVMCapabilityInfo *missing_cap;
  1047. int ret;
  1048. int i;
  1049. int max_vcpus;
  1050. s = g_malloc0(sizeof(KVMState));
  1051. /*
  1052. * On systems where the kernel can support different base page
  1053. * sizes, host page size may be different from TARGET_PAGE_SIZE,
  1054. * even with KVM. TARGET_PAGE_SIZE is assumed to be the minimum
  1055. * page size for the system though.
  1056. */
  1057. assert(TARGET_PAGE_SIZE <= getpagesize());
  1058. #ifdef KVM_CAP_SET_GUEST_DEBUG
  1059. QTAILQ_INIT(&s->kvm_sw_breakpoints);
  1060. #endif
  1061. for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
  1062. s->slots[i].slot = i;
  1063. }
  1064. s->vmfd = -1;
  1065. s->fd = qemu_open("/dev/kvm", O_RDWR);
  1066. if (s->fd == -1) {
  1067. fprintf(stderr, "Could not access KVM kernel module: %m\n");
  1068. ret = -errno;
  1069. goto err;
  1070. }
  1071. ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
  1072. if (ret < KVM_API_VERSION) {
  1073. if (ret > 0) {
  1074. ret = -EINVAL;
  1075. }
  1076. fprintf(stderr, "kvm version too old\n");
  1077. goto err;
  1078. }
  1079. if (ret > KVM_API_VERSION) {
  1080. ret = -EINVAL;
  1081. fprintf(stderr, "kvm version not supported\n");
  1082. goto err;
  1083. }
  1084. max_vcpus = kvm_max_vcpus(s);
  1085. if (smp_cpus > max_vcpus) {
  1086. ret = -EINVAL;
  1087. fprintf(stderr, "Number of SMP cpus requested (%d) exceeds max cpus "
  1088. "supported by KVM (%d)\n", smp_cpus, max_vcpus);
  1089. goto err;
  1090. }
  1091. s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
  1092. if (s->vmfd < 0) {
  1093. #ifdef TARGET_S390X
  1094. fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
  1095. "your host kernel command line\n");
  1096. #endif
  1097. ret = s->vmfd;
  1098. goto err;
  1099. }
  1100. missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
  1101. if (!missing_cap) {
  1102. missing_cap =
  1103. kvm_check_extension_list(s, kvm_arch_required_capabilities);
  1104. }
  1105. if (missing_cap) {
  1106. ret = -EINVAL;
  1107. fprintf(stderr, "kvm does not support %s\n%s",
  1108. missing_cap->name, upgrade_note);
  1109. goto err;
  1110. }
  1111. s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
  1112. s->broken_set_mem_region = 1;
  1113. ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
  1114. if (ret > 0) {
  1115. s->broken_set_mem_region = 0;
  1116. }
  1117. #ifdef KVM_CAP_VCPU_EVENTS
  1118. s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
  1119. #endif
  1120. s->robust_singlestep =
  1121. kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
  1122. #ifdef KVM_CAP_DEBUGREGS
  1123. s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
  1124. #endif
  1125. #ifdef KVM_CAP_XSAVE
  1126. s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
  1127. #endif
  1128. #ifdef KVM_CAP_XCRS
  1129. s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
  1130. #endif
  1131. #ifdef KVM_CAP_PIT_STATE2
  1132. s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
  1133. #endif
  1134. #ifdef KVM_CAP_IRQ_ROUTING
  1135. s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
  1136. #endif
  1137. s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
  1138. s->irq_set_ioctl = KVM_IRQ_LINE;
  1139. if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
  1140. s->irq_set_ioctl = KVM_IRQ_LINE_STATUS;
  1141. }
  1142. ret = kvm_arch_init(s);
  1143. if (ret < 0) {
  1144. goto err;
  1145. }
  1146. ret = kvm_irqchip_create(s);
  1147. if (ret < 0) {
  1148. goto err;
  1149. }
  1150. kvm_state = s;
  1151. memory_listener_register(&kvm_memory_listener, &address_space_memory);
  1152. memory_listener_register(&kvm_io_listener, &address_space_io);
  1153. s->many_ioeventfds = kvm_check_many_ioeventfds();
  1154. cpu_interrupt_handler = kvm_handle_interrupt;
  1155. return 0;
  1156. err:
  1157. if (s->vmfd >= 0) {
  1158. close(s->vmfd);
  1159. }
  1160. if (s->fd != -1) {
  1161. close(s->fd);
  1162. }
  1163. g_free(s);
  1164. return ret;
  1165. }
  1166. static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
  1167. uint32_t count)
  1168. {
  1169. int i;
  1170. uint8_t *ptr = data;
  1171. for (i = 0; i < count; i++) {
  1172. if (direction == KVM_EXIT_IO_IN) {
  1173. switch (size) {
  1174. case 1:
  1175. stb_p(ptr, cpu_inb(port));
  1176. break;
  1177. case 2:
  1178. stw_p(ptr, cpu_inw(port));
  1179. break;
  1180. case 4:
  1181. stl_p(ptr, cpu_inl(port));
  1182. break;
  1183. }
  1184. } else {
  1185. switch (size) {
  1186. case 1:
  1187. cpu_outb(port, ldub_p(ptr));
  1188. break;
  1189. case 2:
  1190. cpu_outw(port, lduw_p(ptr));
  1191. break;
  1192. case 4:
  1193. cpu_outl(port, ldl_p(ptr));
  1194. break;
  1195. }
  1196. }
  1197. ptr += size;
  1198. }
  1199. }
  1200. static int kvm_handle_internal_error(CPUArchState *env, struct kvm_run *run)
  1201. {
  1202. CPUState *cpu = ENV_GET_CPU(env);
  1203. fprintf(stderr, "KVM internal error.");
  1204. if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
  1205. int i;
  1206. fprintf(stderr, " Suberror: %d\n", run->internal.suberror);
  1207. for (i = 0; i < run->internal.ndata; ++i) {
  1208. fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
  1209. i, (uint64_t)run->internal.data[i]);
  1210. }
  1211. } else {
  1212. fprintf(stderr, "\n");
  1213. }
  1214. if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
  1215. fprintf(stderr, "emulation failure\n");
  1216. if (!kvm_arch_stop_on_emulation_error(cpu)) {
  1217. cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
  1218. return EXCP_INTERRUPT;
  1219. }
  1220. }
  1221. /* FIXME: Should trigger a qmp message to let management know
  1222. * something went wrong.
  1223. */
  1224. return -1;
  1225. }
  1226. void kvm_flush_coalesced_mmio_buffer(void)
  1227. {
  1228. KVMState *s = kvm_state;
  1229. if (s->coalesced_flush_in_progress) {
  1230. return;
  1231. }
  1232. s->coalesced_flush_in_progress = true;
  1233. if (s->coalesced_mmio_ring) {
  1234. struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
  1235. while (ring->first != ring->last) {
  1236. struct kvm_coalesced_mmio *ent;
  1237. ent = &ring->coalesced_mmio[ring->first];
  1238. cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
  1239. smp_wmb();
  1240. ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
  1241. }
  1242. }
  1243. s->coalesced_flush_in_progress = false;
  1244. }
  1245. static void do_kvm_cpu_synchronize_state(void *arg)
  1246. {
  1247. CPUState *cpu = arg;
  1248. if (!cpu->kvm_vcpu_dirty) {
  1249. kvm_arch_get_registers(cpu);
  1250. cpu->kvm_vcpu_dirty = true;
  1251. }
  1252. }
  1253. void kvm_cpu_synchronize_state(CPUArchState *env)
  1254. {
  1255. CPUState *cpu = ENV_GET_CPU(env);
  1256. if (!cpu->kvm_vcpu_dirty) {
  1257. run_on_cpu(cpu, do_kvm_cpu_synchronize_state, cpu);
  1258. }
  1259. }
  1260. void kvm_cpu_synchronize_post_reset(CPUArchState *env)
  1261. {
  1262. CPUState *cpu = ENV_GET_CPU(env);
  1263. kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
  1264. cpu->kvm_vcpu_dirty = false;
  1265. }
  1266. void kvm_cpu_synchronize_post_init(CPUArchState *env)
  1267. {
  1268. CPUState *cpu = ENV_GET_CPU(env);
  1269. kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
  1270. cpu->kvm_vcpu_dirty = false;
  1271. }
  1272. int kvm_cpu_exec(CPUArchState *env)
  1273. {
  1274. CPUState *cpu = ENV_GET_CPU(env);
  1275. struct kvm_run *run = cpu->kvm_run;
  1276. int ret, run_ret;
  1277. DPRINTF("kvm_cpu_exec()\n");
  1278. if (kvm_arch_process_async_events(cpu)) {
  1279. env->exit_request = 0;
  1280. return EXCP_HLT;
  1281. }
  1282. do {
  1283. if (cpu->kvm_vcpu_dirty) {
  1284. kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
  1285. cpu->kvm_vcpu_dirty = false;
  1286. }
  1287. kvm_arch_pre_run(cpu, run);
  1288. if (env->exit_request) {
  1289. DPRINTF("interrupt exit requested\n");
  1290. /*
  1291. * KVM requires us to reenter the kernel after IO exits to complete
  1292. * instruction emulation. This self-signal will ensure that we
  1293. * leave ASAP again.
  1294. */
  1295. qemu_cpu_kick_self();
  1296. }
  1297. qemu_mutex_unlock_iothread();
  1298. run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
  1299. qemu_mutex_lock_iothread();
  1300. kvm_arch_post_run(cpu, run);
  1301. if (run_ret < 0) {
  1302. if (run_ret == -EINTR || run_ret == -EAGAIN) {
  1303. DPRINTF("io window exit\n");
  1304. ret = EXCP_INTERRUPT;
  1305. break;
  1306. }
  1307. fprintf(stderr, "error: kvm run failed %s\n",
  1308. strerror(-run_ret));
  1309. abort();
  1310. }
  1311. switch (run->exit_reason) {
  1312. case KVM_EXIT_IO:
  1313. DPRINTF("handle_io\n");
  1314. kvm_handle_io(run->io.port,
  1315. (uint8_t *)run + run->io.data_offset,
  1316. run->io.direction,
  1317. run->io.size,
  1318. run->io.count);
  1319. ret = 0;
  1320. break;
  1321. case KVM_EXIT_MMIO:
  1322. DPRINTF("handle_mmio\n");
  1323. cpu_physical_memory_rw(run->mmio.phys_addr,
  1324. run->mmio.data,
  1325. run->mmio.len,
  1326. run->mmio.is_write);
  1327. ret = 0;
  1328. break;
  1329. case KVM_EXIT_IRQ_WINDOW_OPEN:
  1330. DPRINTF("irq_window_open\n");
  1331. ret = EXCP_INTERRUPT;
  1332. break;
  1333. case KVM_EXIT_SHUTDOWN:
  1334. DPRINTF("shutdown\n");
  1335. qemu_system_reset_request();
  1336. ret = EXCP_INTERRUPT;
  1337. break;
  1338. case KVM_EXIT_UNKNOWN:
  1339. fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
  1340. (uint64_t)run->hw.hardware_exit_reason);
  1341. ret = -1;
  1342. break;
  1343. case KVM_EXIT_INTERNAL_ERROR:
  1344. ret = kvm_handle_internal_error(env, run);
  1345. break;
  1346. default:
  1347. DPRINTF("kvm_arch_handle_exit\n");
  1348. ret = kvm_arch_handle_exit(cpu, run);
  1349. break;
  1350. }
  1351. } while (ret == 0);
  1352. if (ret < 0) {
  1353. cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
  1354. vm_stop(RUN_STATE_INTERNAL_ERROR);
  1355. }
  1356. env->exit_request = 0;
  1357. return ret;
  1358. }
  1359. int kvm_ioctl(KVMState *s, int type, ...)
  1360. {
  1361. int ret;
  1362. void *arg;
  1363. va_list ap;
  1364. va_start(ap, type);
  1365. arg = va_arg(ap, void *);
  1366. va_end(ap);
  1367. ret = ioctl(s->fd, type, arg);
  1368. if (ret == -1) {
  1369. ret = -errno;
  1370. }
  1371. return ret;
  1372. }
  1373. int kvm_vm_ioctl(KVMState *s, int type, ...)
  1374. {
  1375. int ret;
  1376. void *arg;
  1377. va_list ap;
  1378. va_start(ap, type);
  1379. arg = va_arg(ap, void *);
  1380. va_end(ap);
  1381. ret = ioctl(s->vmfd, type, arg);
  1382. if (ret == -1) {
  1383. ret = -errno;
  1384. }
  1385. return ret;
  1386. }
  1387. int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
  1388. {
  1389. int ret;
  1390. void *arg;
  1391. va_list ap;
  1392. va_start(ap, type);
  1393. arg = va_arg(ap, void *);
  1394. va_end(ap);
  1395. ret = ioctl(cpu->kvm_fd, type, arg);
  1396. if (ret == -1) {
  1397. ret = -errno;
  1398. }
  1399. return ret;
  1400. }
  1401. int kvm_has_sync_mmu(void)
  1402. {
  1403. return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
  1404. }
  1405. int kvm_has_vcpu_events(void)
  1406. {
  1407. return kvm_state->vcpu_events;
  1408. }
  1409. int kvm_has_robust_singlestep(void)
  1410. {
  1411. return kvm_state->robust_singlestep;
  1412. }
  1413. int kvm_has_debugregs(void)
  1414. {
  1415. return kvm_state->debugregs;
  1416. }
  1417. int kvm_has_xsave(void)
  1418. {
  1419. return kvm_state->xsave;
  1420. }
  1421. int kvm_has_xcrs(void)
  1422. {
  1423. return kvm_state->xcrs;
  1424. }
  1425. int kvm_has_pit_state2(void)
  1426. {
  1427. return kvm_state->pit_state2;
  1428. }
  1429. int kvm_has_many_ioeventfds(void)
  1430. {
  1431. if (!kvm_enabled()) {
  1432. return 0;
  1433. }
  1434. return kvm_state->many_ioeventfds;
  1435. }
  1436. int kvm_has_gsi_routing(void)
  1437. {
  1438. #ifdef KVM_CAP_IRQ_ROUTING
  1439. return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
  1440. #else
  1441. return false;
  1442. #endif
  1443. }
  1444. int kvm_has_intx_set_mask(void)
  1445. {
  1446. return kvm_state->intx_set_mask;
  1447. }
  1448. void *kvm_vmalloc(ram_addr_t size)
  1449. {
  1450. #ifdef TARGET_S390X
  1451. void *mem;
  1452. mem = kvm_arch_vmalloc(size);
  1453. if (mem) {
  1454. return mem;
  1455. }
  1456. #endif
  1457. return qemu_vmalloc(size);
  1458. }
  1459. void kvm_setup_guest_memory(void *start, size_t size)
  1460. {
  1461. #ifdef CONFIG_VALGRIND_H
  1462. VALGRIND_MAKE_MEM_DEFINED(start, size);
  1463. #endif
  1464. if (!kvm_has_sync_mmu()) {
  1465. int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
  1466. if (ret) {
  1467. perror("qemu_madvise");
  1468. fprintf(stderr,
  1469. "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
  1470. exit(1);
  1471. }
  1472. }
  1473. }
  1474. #ifdef KVM_CAP_SET_GUEST_DEBUG
  1475. struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
  1476. target_ulong pc)
  1477. {
  1478. struct kvm_sw_breakpoint *bp;
  1479. QTAILQ_FOREACH(bp, &cpu->kvm_state->kvm_sw_breakpoints, entry) {
  1480. if (bp->pc == pc) {
  1481. return bp;
  1482. }
  1483. }
  1484. return NULL;
  1485. }
  1486. int kvm_sw_breakpoints_active(CPUState *cpu)
  1487. {
  1488. return !QTAILQ_EMPTY(&cpu->kvm_state->kvm_sw_breakpoints);
  1489. }
  1490. struct kvm_set_guest_debug_data {
  1491. struct kvm_guest_debug dbg;
  1492. CPUState *cpu;
  1493. int err;
  1494. };
  1495. static void kvm_invoke_set_guest_debug(void *data)
  1496. {
  1497. struct kvm_set_guest_debug_data *dbg_data = data;
  1498. dbg_data->err = kvm_vcpu_ioctl(dbg_data->cpu, KVM_SET_GUEST_DEBUG,
  1499. &dbg_data->dbg);
  1500. }
  1501. int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
  1502. {
  1503. CPUState *cpu = ENV_GET_CPU(env);
  1504. struct kvm_set_guest_debug_data data;
  1505. data.dbg.control = reinject_trap;
  1506. if (env->singlestep_enabled) {
  1507. data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
  1508. }
  1509. kvm_arch_update_guest_debug(cpu, &data.dbg);
  1510. data.cpu = cpu;
  1511. run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data);
  1512. return data.err;
  1513. }
  1514. int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
  1515. target_ulong len, int type)
  1516. {
  1517. CPUState *current_cpu = ENV_GET_CPU(current_env);
  1518. struct kvm_sw_breakpoint *bp;
  1519. CPUArchState *env;
  1520. int err;
  1521. if (type == GDB_BREAKPOINT_SW) {
  1522. bp = kvm_find_sw_breakpoint(current_cpu, addr);
  1523. if (bp) {
  1524. bp->use_count++;
  1525. return 0;
  1526. }
  1527. bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
  1528. if (!bp) {
  1529. return -ENOMEM;
  1530. }
  1531. bp->pc = addr;
  1532. bp->use_count = 1;
  1533. err = kvm_arch_insert_sw_breakpoint(current_cpu, bp);
  1534. if (err) {
  1535. g_free(bp);
  1536. return err;
  1537. }
  1538. QTAILQ_INSERT_HEAD(&current_cpu->kvm_state->kvm_sw_breakpoints,
  1539. bp, entry);
  1540. } else {
  1541. err = kvm_arch_insert_hw_breakpoint(addr, len, type);
  1542. if (err) {
  1543. return err;
  1544. }
  1545. }
  1546. for (env = first_cpu; env != NULL; env = env->next_cpu) {
  1547. err = kvm_update_guest_debug(env, 0);
  1548. if (err) {
  1549. return err;
  1550. }
  1551. }
  1552. return 0;
  1553. }
  1554. int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
  1555. target_ulong len, int type)
  1556. {
  1557. CPUState *current_cpu = ENV_GET_CPU(current_env);
  1558. struct kvm_sw_breakpoint *bp;
  1559. CPUArchState *env;
  1560. int err;
  1561. if (type == GDB_BREAKPOINT_SW) {
  1562. bp = kvm_find_sw_breakpoint(current_cpu, addr);
  1563. if (!bp) {
  1564. return -ENOENT;
  1565. }
  1566. if (bp->use_count > 1) {
  1567. bp->use_count--;
  1568. return 0;
  1569. }
  1570. err = kvm_arch_remove_sw_breakpoint(current_cpu, bp);
  1571. if (err) {
  1572. return err;
  1573. }
  1574. QTAILQ_REMOVE(&current_cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
  1575. g_free(bp);
  1576. } else {
  1577. err = kvm_arch_remove_hw_breakpoint(addr, len, type);
  1578. if (err) {
  1579. return err;
  1580. }
  1581. }
  1582. for (env = first_cpu; env != NULL; env = env->next_cpu) {
  1583. err = kvm_update_guest_debug(env, 0);
  1584. if (err) {
  1585. return err;
  1586. }
  1587. }
  1588. return 0;
  1589. }
  1590. void kvm_remove_all_breakpoints(CPUArchState *current_env)
  1591. {
  1592. CPUState *current_cpu = ENV_GET_CPU(current_env);
  1593. struct kvm_sw_breakpoint *bp, *next;
  1594. KVMState *s = current_cpu->kvm_state;
  1595. CPUArchState *env;
  1596. CPUState *cpu;
  1597. QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
  1598. if (kvm_arch_remove_sw_breakpoint(current_cpu, bp) != 0) {
  1599. /* Try harder to find a CPU that currently sees the breakpoint. */
  1600. for (env = first_cpu; env != NULL; env = env->next_cpu) {
  1601. cpu = ENV_GET_CPU(env);
  1602. if (kvm_arch_remove_sw_breakpoint(cpu, bp) == 0) {
  1603. break;
  1604. }
  1605. }
  1606. }
  1607. QTAILQ_REMOVE(&s->kvm_sw_breakpoints, bp, entry);
  1608. g_free(bp);
  1609. }
  1610. kvm_arch_remove_all_hw_breakpoints();
  1611. for (env = first_cpu; env != NULL; env = env->next_cpu) {
  1612. kvm_update_guest_debug(env, 0);
  1613. }
  1614. }
  1615. #else /* !KVM_CAP_SET_GUEST_DEBUG */
  1616. int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap)
  1617. {
  1618. return -EINVAL;
  1619. }
  1620. int kvm_insert_breakpoint(CPUArchState *current_env, target_ulong addr,
  1621. target_ulong len, int type)
  1622. {
  1623. return -EINVAL;
  1624. }
  1625. int kvm_remove_breakpoint(CPUArchState *current_env, target_ulong addr,
  1626. target_ulong len, int type)
  1627. {
  1628. return -EINVAL;
  1629. }
  1630. void kvm_remove_all_breakpoints(CPUArchState *current_env)
  1631. {
  1632. }
  1633. #endif /* !KVM_CAP_SET_GUEST_DEBUG */
  1634. int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset)
  1635. {
  1636. CPUState *cpu = ENV_GET_CPU(env);
  1637. struct kvm_signal_mask *sigmask;
  1638. int r;
  1639. if (!sigset) {
  1640. return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL);
  1641. }
  1642. sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
  1643. sigmask->len = 8;
  1644. memcpy(sigmask->sigset, sigset, sizeof(*sigset));
  1645. r = kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, sigmask);
  1646. g_free(sigmask);
  1647. return r;
  1648. }
  1649. int kvm_set_ioeventfd_mmio(int fd, uint32_t addr, uint32_t val, bool assign,
  1650. uint32_t size)
  1651. {
  1652. int ret;
  1653. struct kvm_ioeventfd iofd;
  1654. iofd.datamatch = val;
  1655. iofd.addr = addr;
  1656. iofd.len = size;
  1657. iofd.flags = KVM_IOEVENTFD_FLAG_DATAMATCH;
  1658. iofd.fd = fd;
  1659. if (!kvm_enabled()) {
  1660. return -ENOSYS;
  1661. }
  1662. if (!assign) {
  1663. iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
  1664. }
  1665. ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
  1666. if (ret < 0) {
  1667. return -errno;
  1668. }
  1669. return 0;
  1670. }
  1671. int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
  1672. {
  1673. struct kvm_ioeventfd kick = {
  1674. .datamatch = val,
  1675. .addr = addr,
  1676. .len = 2,
  1677. .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
  1678. .fd = fd,
  1679. };
  1680. int r;
  1681. if (!kvm_enabled()) {
  1682. return -ENOSYS;
  1683. }
  1684. if (!assign) {
  1685. kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
  1686. }
  1687. r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
  1688. if (r < 0) {
  1689. return r;
  1690. }
  1691. return 0;
  1692. }
  1693. int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
  1694. {
  1695. return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
  1696. }
  1697. int kvm_on_sigbus(int code, void *addr)
  1698. {
  1699. return kvm_arch_on_sigbus(code, addr);
  1700. }