kvm-all.c 68 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617
  1. /*
  2. * QEMU KVM support
  3. *
  4. * Copyright IBM, Corp. 2008
  5. * Red Hat, Inc. 2008
  6. *
  7. * Authors:
  8. * Anthony Liguori <aliguori@us.ibm.com>
  9. * Glauber Costa <gcosta@redhat.com>
  10. *
  11. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  12. * See the COPYING file in the top-level directory.
  13. *
  14. */
  15. #include "qemu/osdep.h"
  16. #include <sys/ioctl.h>
  17. #include <linux/kvm.h>
  18. #include "qemu-common.h"
  19. #include "qemu/atomic.h"
  20. #include "qemu/option.h"
  21. #include "qemu/config-file.h"
  22. #include "qemu/error-report.h"
  23. #include "hw/hw.h"
  24. #include "hw/pci/msi.h"
  25. #include "hw/pci/msix.h"
  26. #include "hw/s390x/adapter.h"
  27. #include "exec/gdbstub.h"
  28. #include "sysemu/kvm_int.h"
  29. #include "sysemu/cpus.h"
  30. #include "qemu/bswap.h"
  31. #include "exec/memory.h"
  32. #include "exec/ram_addr.h"
  33. #include "exec/address-spaces.h"
  34. #include "qemu/event_notifier.h"
  35. #include "trace-root.h"
  36. #include "hw/irq.h"
  37. #include "hw/boards.h"
  38. /* This check must be after config-host.h is included */
  39. #ifdef CONFIG_EVENTFD
  40. #include <sys/eventfd.h>
  41. #endif
  42. /* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
  43. * need to use the real host PAGE_SIZE, as that's what KVM will use.
  44. */
  45. #define PAGE_SIZE getpagesize()
  46. //#define DEBUG_KVM
  47. #ifdef DEBUG_KVM
  48. #define DPRINTF(fmt, ...) \
  49. do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  50. #else
  51. #define DPRINTF(fmt, ...) \
  52. do { } while (0)
  53. #endif
  54. #define KVM_MSI_HASHTAB_SIZE 256
  55. struct KVMParkedVcpu {
  56. unsigned long vcpu_id;
  57. int kvm_fd;
  58. QLIST_ENTRY(KVMParkedVcpu) node;
  59. };
  60. struct KVMState
  61. {
  62. AccelState parent_obj;
  63. int nr_slots;
  64. int fd;
  65. int vmfd;
  66. int coalesced_mmio;
  67. struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
  68. bool coalesced_flush_in_progress;
  69. int broken_set_mem_region;
  70. int vcpu_events;
  71. int robust_singlestep;
  72. int debugregs;
  73. #ifdef KVM_CAP_SET_GUEST_DEBUG
  74. struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
  75. #endif
  76. int many_ioeventfds;
  77. int intx_set_mask;
  78. /* The man page (and posix) say ioctl numbers are signed int, but
  79. * they're not. Linux, glibc and *BSD all treat ioctl numbers as
  80. * unsigned, and treating them as signed here can break things */
  81. unsigned irq_set_ioctl;
  82. unsigned int sigmask_len;
  83. GHashTable *gsimap;
  84. #ifdef KVM_CAP_IRQ_ROUTING
  85. struct kvm_irq_routing *irq_routes;
  86. int nr_allocated_irq_routes;
  87. unsigned long *used_gsi_bitmap;
  88. unsigned int gsi_count;
  89. QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
  90. #endif
  91. KVMMemoryListener memory_listener;
  92. QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
  93. };
  94. KVMState *kvm_state;
  95. bool kvm_kernel_irqchip;
  96. bool kvm_split_irqchip;
  97. bool kvm_async_interrupts_allowed;
  98. bool kvm_halt_in_kernel_allowed;
  99. bool kvm_eventfds_allowed;
  100. bool kvm_irqfds_allowed;
  101. bool kvm_resamplefds_allowed;
  102. bool kvm_msi_via_irqfd_allowed;
  103. bool kvm_gsi_routing_allowed;
  104. bool kvm_gsi_direct_mapping;
  105. bool kvm_allowed;
  106. bool kvm_readonly_mem_allowed;
  107. bool kvm_vm_attributes_allowed;
  108. bool kvm_direct_msi_allowed;
  109. bool kvm_ioeventfd_any_length_allowed;
  110. bool kvm_msi_use_devid;
  111. static bool kvm_immediate_exit;
  112. static const KVMCapabilityInfo kvm_required_capabilites[] = {
  113. KVM_CAP_INFO(USER_MEMORY),
  114. KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
  115. KVM_CAP_LAST_INFO
  116. };
  117. int kvm_get_max_memslots(void)
  118. {
  119. KVMState *s = KVM_STATE(current_machine->accelerator);
  120. return s->nr_slots;
  121. }
  122. static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
  123. {
  124. KVMState *s = kvm_state;
  125. int i;
  126. for (i = 0; i < s->nr_slots; i++) {
  127. if (kml->slots[i].memory_size == 0) {
  128. return &kml->slots[i];
  129. }
  130. }
  131. return NULL;
  132. }
  133. bool kvm_has_free_slot(MachineState *ms)
  134. {
  135. KVMState *s = KVM_STATE(ms->accelerator);
  136. return kvm_get_free_slot(&s->memory_listener);
  137. }
  138. static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)
  139. {
  140. KVMSlot *slot = kvm_get_free_slot(kml);
  141. if (slot) {
  142. return slot;
  143. }
  144. fprintf(stderr, "%s: no free slot available\n", __func__);
  145. abort();
  146. }
  147. static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener *kml,
  148. hwaddr start_addr,
  149. hwaddr end_addr)
  150. {
  151. KVMState *s = kvm_state;
  152. int i;
  153. for (i = 0; i < s->nr_slots; i++) {
  154. KVMSlot *mem = &kml->slots[i];
  155. if (start_addr == mem->start_addr &&
  156. end_addr == mem->start_addr + mem->memory_size) {
  157. return mem;
  158. }
  159. }
  160. return NULL;
  161. }
  162. /*
  163. * Find overlapping slot with lowest start address
  164. */
  165. static KVMSlot *kvm_lookup_overlapping_slot(KVMMemoryListener *kml,
  166. hwaddr start_addr,
  167. hwaddr end_addr)
  168. {
  169. KVMState *s = kvm_state;
  170. KVMSlot *found = NULL;
  171. int i;
  172. for (i = 0; i < s->nr_slots; i++) {
  173. KVMSlot *mem = &kml->slots[i];
  174. if (mem->memory_size == 0 ||
  175. (found && found->start_addr < mem->start_addr)) {
  176. continue;
  177. }
  178. if (end_addr > mem->start_addr &&
  179. start_addr < mem->start_addr + mem->memory_size) {
  180. found = mem;
  181. }
  182. }
  183. return found;
  184. }
  185. int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
  186. hwaddr *phys_addr)
  187. {
  188. KVMMemoryListener *kml = &s->memory_listener;
  189. int i;
  190. for (i = 0; i < s->nr_slots; i++) {
  191. KVMSlot *mem = &kml->slots[i];
  192. if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
  193. *phys_addr = mem->start_addr + (ram - mem->ram);
  194. return 1;
  195. }
  196. }
  197. return 0;
  198. }
  199. static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot)
  200. {
  201. KVMState *s = kvm_state;
  202. struct kvm_userspace_memory_region mem;
  203. mem.slot = slot->slot | (kml->as_id << 16);
  204. mem.guest_phys_addr = slot->start_addr;
  205. mem.userspace_addr = (unsigned long)slot->ram;
  206. mem.flags = slot->flags;
  207. if (slot->memory_size && mem.flags & KVM_MEM_READONLY) {
  208. /* Set the slot size to 0 before setting the slot to the desired
  209. * value. This is needed based on KVM commit 75d61fbc. */
  210. mem.memory_size = 0;
  211. kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
  212. }
  213. mem.memory_size = slot->memory_size;
  214. return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
  215. }
  216. int kvm_destroy_vcpu(CPUState *cpu)
  217. {
  218. KVMState *s = kvm_state;
  219. long mmap_size;
  220. struct KVMParkedVcpu *vcpu = NULL;
  221. int ret = 0;
  222. DPRINTF("kvm_destroy_vcpu\n");
  223. mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
  224. if (mmap_size < 0) {
  225. ret = mmap_size;
  226. DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
  227. goto err;
  228. }
  229. ret = munmap(cpu->kvm_run, mmap_size);
  230. if (ret < 0) {
  231. goto err;
  232. }
  233. vcpu = g_malloc0(sizeof(*vcpu));
  234. vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
  235. vcpu->kvm_fd = cpu->kvm_fd;
  236. QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
  237. err:
  238. return ret;
  239. }
  240. static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
  241. {
  242. struct KVMParkedVcpu *cpu;
  243. QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
  244. if (cpu->vcpu_id == vcpu_id) {
  245. int kvm_fd;
  246. QLIST_REMOVE(cpu, node);
  247. kvm_fd = cpu->kvm_fd;
  248. g_free(cpu);
  249. return kvm_fd;
  250. }
  251. }
  252. return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
  253. }
  254. int kvm_init_vcpu(CPUState *cpu)
  255. {
  256. KVMState *s = kvm_state;
  257. long mmap_size;
  258. int ret;
  259. DPRINTF("kvm_init_vcpu\n");
  260. ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
  261. if (ret < 0) {
  262. DPRINTF("kvm_create_vcpu failed\n");
  263. goto err;
  264. }
  265. cpu->kvm_fd = ret;
  266. cpu->kvm_state = s;
  267. cpu->kvm_vcpu_dirty = true;
  268. mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
  269. if (mmap_size < 0) {
  270. ret = mmap_size;
  271. DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
  272. goto err;
  273. }
  274. cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
  275. cpu->kvm_fd, 0);
  276. if (cpu->kvm_run == MAP_FAILED) {
  277. ret = -errno;
  278. DPRINTF("mmap'ing vcpu state failed\n");
  279. goto err;
  280. }
  281. if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
  282. s->coalesced_mmio_ring =
  283. (void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
  284. }
  285. ret = kvm_arch_init_vcpu(cpu);
  286. err:
  287. return ret;
  288. }
  289. /*
  290. * dirty pages logging control
  291. */
  292. static int kvm_mem_flags(MemoryRegion *mr)
  293. {
  294. bool readonly = mr->readonly || memory_region_is_romd(mr);
  295. int flags = 0;
  296. if (memory_region_get_dirty_log_mask(mr) != 0) {
  297. flags |= KVM_MEM_LOG_DIRTY_PAGES;
  298. }
  299. if (readonly && kvm_readonly_mem_allowed) {
  300. flags |= KVM_MEM_READONLY;
  301. }
  302. return flags;
  303. }
  304. static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem,
  305. MemoryRegion *mr)
  306. {
  307. int old_flags;
  308. old_flags = mem->flags;
  309. mem->flags = kvm_mem_flags(mr);
  310. /* If nothing changed effectively, no need to issue ioctl */
  311. if (mem->flags == old_flags) {
  312. return 0;
  313. }
  314. return kvm_set_user_memory_region(kml, mem);
  315. }
  316. static int kvm_section_update_flags(KVMMemoryListener *kml,
  317. MemoryRegionSection *section)
  318. {
  319. hwaddr phys_addr = section->offset_within_address_space;
  320. ram_addr_t size = int128_get64(section->size);
  321. KVMSlot *mem = kvm_lookup_matching_slot(kml, phys_addr, phys_addr + size);
  322. if (mem == NULL) {
  323. return 0;
  324. } else {
  325. return kvm_slot_update_flags(kml, mem, section->mr);
  326. }
  327. }
  328. static void kvm_log_start(MemoryListener *listener,
  329. MemoryRegionSection *section,
  330. int old, int new)
  331. {
  332. KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
  333. int r;
  334. if (old != 0) {
  335. return;
  336. }
  337. r = kvm_section_update_flags(kml, section);
  338. if (r < 0) {
  339. abort();
  340. }
  341. }
  342. static void kvm_log_stop(MemoryListener *listener,
  343. MemoryRegionSection *section,
  344. int old, int new)
  345. {
  346. KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
  347. int r;
  348. if (new != 0) {
  349. return;
  350. }
  351. r = kvm_section_update_flags(kml, section);
  352. if (r < 0) {
  353. abort();
  354. }
  355. }
  356. /* get kvm's dirty pages bitmap and update qemu's */
  357. static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
  358. unsigned long *bitmap)
  359. {
  360. ram_addr_t start = section->offset_within_region +
  361. memory_region_get_ram_addr(section->mr);
  362. ram_addr_t pages = int128_get64(section->size) / getpagesize();
  363. cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
  364. return 0;
  365. }
  366. #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
  367. /**
  368. * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
  369. * This function updates qemu's dirty bitmap using
  370. * memory_region_set_dirty(). This means all bits are set
  371. * to dirty.
  372. *
  373. * @start_add: start of logged region.
  374. * @end_addr: end of logged region.
  375. */
  376. static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml,
  377. MemoryRegionSection *section)
  378. {
  379. KVMState *s = kvm_state;
  380. unsigned long size, allocated_size = 0;
  381. struct kvm_dirty_log d = {};
  382. KVMSlot *mem;
  383. int ret = 0;
  384. hwaddr start_addr = section->offset_within_address_space;
  385. hwaddr end_addr = start_addr + int128_get64(section->size);
  386. d.dirty_bitmap = NULL;
  387. while (start_addr < end_addr) {
  388. mem = kvm_lookup_overlapping_slot(kml, start_addr, end_addr);
  389. if (mem == NULL) {
  390. break;
  391. }
  392. /* XXX bad kernel interface alert
  393. * For dirty bitmap, kernel allocates array of size aligned to
  394. * bits-per-long. But for case when the kernel is 64bits and
  395. * the userspace is 32bits, userspace can't align to the same
  396. * bits-per-long, since sizeof(long) is different between kernel
  397. * and user space. This way, userspace will provide buffer which
  398. * may be 4 bytes less than the kernel will use, resulting in
  399. * userspace memory corruption (which is not detectable by valgrind
  400. * too, in most cases).
  401. * So for now, let's align to 64 instead of HOST_LONG_BITS here, in
  402. * a hope that sizeof(long) won't become >8 any time soon.
  403. */
  404. size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
  405. /*HOST_LONG_BITS*/ 64) / 8;
  406. if (!d.dirty_bitmap) {
  407. d.dirty_bitmap = g_malloc(size);
  408. } else if (size > allocated_size) {
  409. d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
  410. }
  411. allocated_size = size;
  412. memset(d.dirty_bitmap, 0, allocated_size);
  413. d.slot = mem->slot | (kml->as_id << 16);
  414. if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
  415. DPRINTF("ioctl failed %d\n", errno);
  416. ret = -1;
  417. break;
  418. }
  419. kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
  420. start_addr = mem->start_addr + mem->memory_size;
  421. }
  422. g_free(d.dirty_bitmap);
  423. return ret;
  424. }
  425. static void kvm_coalesce_mmio_region(MemoryListener *listener,
  426. MemoryRegionSection *secion,
  427. hwaddr start, hwaddr size)
  428. {
  429. KVMState *s = kvm_state;
  430. if (s->coalesced_mmio) {
  431. struct kvm_coalesced_mmio_zone zone;
  432. zone.addr = start;
  433. zone.size = size;
  434. zone.pad = 0;
  435. (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
  436. }
  437. }
  438. static void kvm_uncoalesce_mmio_region(MemoryListener *listener,
  439. MemoryRegionSection *secion,
  440. hwaddr start, hwaddr size)
  441. {
  442. KVMState *s = kvm_state;
  443. if (s->coalesced_mmio) {
  444. struct kvm_coalesced_mmio_zone zone;
  445. zone.addr = start;
  446. zone.size = size;
  447. zone.pad = 0;
  448. (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
  449. }
  450. }
  451. int kvm_check_extension(KVMState *s, unsigned int extension)
  452. {
  453. int ret;
  454. ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
  455. if (ret < 0) {
  456. ret = 0;
  457. }
  458. return ret;
  459. }
  460. int kvm_vm_check_extension(KVMState *s, unsigned int extension)
  461. {
  462. int ret;
  463. ret = kvm_vm_ioctl(s, KVM_CHECK_EXTENSION, extension);
  464. if (ret < 0) {
  465. /* VM wide version not implemented, use global one instead */
  466. ret = kvm_check_extension(s, extension);
  467. }
  468. return ret;
  469. }
  470. static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
  471. {
  472. #if defined(HOST_WORDS_BIGENDIAN) != defined(TARGET_WORDS_BIGENDIAN)
  473. /* The kernel expects ioeventfd values in HOST_WORDS_BIGENDIAN
  474. * endianness, but the memory core hands them in target endianness.
  475. * For example, PPC is always treated as big-endian even if running
  476. * on KVM and on PPC64LE. Correct here.
  477. */
  478. switch (size) {
  479. case 2:
  480. val = bswap16(val);
  481. break;
  482. case 4:
  483. val = bswap32(val);
  484. break;
  485. }
  486. #endif
  487. return val;
  488. }
  489. static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val,
  490. bool assign, uint32_t size, bool datamatch)
  491. {
  492. int ret;
  493. struct kvm_ioeventfd iofd = {
  494. .datamatch = datamatch ? adjust_ioeventfd_endianness(val, size) : 0,
  495. .addr = addr,
  496. .len = size,
  497. .flags = 0,
  498. .fd = fd,
  499. };
  500. if (!kvm_enabled()) {
  501. return -ENOSYS;
  502. }
  503. if (datamatch) {
  504. iofd.flags |= KVM_IOEVENTFD_FLAG_DATAMATCH;
  505. }
  506. if (!assign) {
  507. iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
  508. }
  509. ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
  510. if (ret < 0) {
  511. return -errno;
  512. }
  513. return 0;
  514. }
  515. static int kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint16_t val,
  516. bool assign, uint32_t size, bool datamatch)
  517. {
  518. struct kvm_ioeventfd kick = {
  519. .datamatch = datamatch ? adjust_ioeventfd_endianness(val, size) : 0,
  520. .addr = addr,
  521. .flags = KVM_IOEVENTFD_FLAG_PIO,
  522. .len = size,
  523. .fd = fd,
  524. };
  525. int r;
  526. if (!kvm_enabled()) {
  527. return -ENOSYS;
  528. }
  529. if (datamatch) {
  530. kick.flags |= KVM_IOEVENTFD_FLAG_DATAMATCH;
  531. }
  532. if (!assign) {
  533. kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
  534. }
  535. r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
  536. if (r < 0) {
  537. return r;
  538. }
  539. return 0;
  540. }
  541. static int kvm_check_many_ioeventfds(void)
  542. {
  543. /* Userspace can use ioeventfd for io notification. This requires a host
  544. * that supports eventfd(2) and an I/O thread; since eventfd does not
  545. * support SIGIO it cannot interrupt the vcpu.
  546. *
  547. * Older kernels have a 6 device limit on the KVM io bus. Find out so we
  548. * can avoid creating too many ioeventfds.
  549. */
  550. #if defined(CONFIG_EVENTFD)
  551. int ioeventfds[7];
  552. int i, ret = 0;
  553. for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
  554. ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
  555. if (ioeventfds[i] < 0) {
  556. break;
  557. }
  558. ret = kvm_set_ioeventfd_pio(ioeventfds[i], 0, i, true, 2, true);
  559. if (ret < 0) {
  560. close(ioeventfds[i]);
  561. break;
  562. }
  563. }
  564. /* Decide whether many devices are supported or not */
  565. ret = i == ARRAY_SIZE(ioeventfds);
  566. while (i-- > 0) {
  567. kvm_set_ioeventfd_pio(ioeventfds[i], 0, i, false, 2, true);
  568. close(ioeventfds[i]);
  569. }
  570. return ret;
  571. #else
  572. return 0;
  573. #endif
  574. }
  575. static const KVMCapabilityInfo *
  576. kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
  577. {
  578. while (list->name) {
  579. if (!kvm_check_extension(s, list->value)) {
  580. return list;
  581. }
  582. list++;
  583. }
  584. return NULL;
  585. }
  586. static void kvm_set_phys_mem(KVMMemoryListener *kml,
  587. MemoryRegionSection *section, bool add)
  588. {
  589. KVMState *s = kvm_state;
  590. KVMSlot *mem, old;
  591. int err;
  592. MemoryRegion *mr = section->mr;
  593. bool writeable = !mr->readonly && !mr->rom_device;
  594. hwaddr start_addr = section->offset_within_address_space;
  595. ram_addr_t size = int128_get64(section->size);
  596. void *ram = NULL;
  597. unsigned delta;
  598. /* kvm works in page size chunks, but the function may be called
  599. with sub-page size and unaligned start address. Pad the start
  600. address to next and truncate size to previous page boundary. */
  601. delta = qemu_real_host_page_size - (start_addr & ~qemu_real_host_page_mask);
  602. delta &= ~qemu_real_host_page_mask;
  603. if (delta > size) {
  604. return;
  605. }
  606. start_addr += delta;
  607. size -= delta;
  608. size &= qemu_real_host_page_mask;
  609. if (!size || (start_addr & ~qemu_real_host_page_mask)) {
  610. return;
  611. }
  612. if (!memory_region_is_ram(mr)) {
  613. if (writeable || !kvm_readonly_mem_allowed) {
  614. return;
  615. } else if (!mr->romd_mode) {
  616. /* If the memory device is not in romd_mode, then we actually want
  617. * to remove the kvm memory slot so all accesses will trap. */
  618. add = false;
  619. }
  620. }
  621. ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta;
  622. while (1) {
  623. mem = kvm_lookup_overlapping_slot(kml, start_addr, start_addr + size);
  624. if (!mem) {
  625. break;
  626. }
  627. if (add && start_addr >= mem->start_addr &&
  628. (start_addr + size <= mem->start_addr + mem->memory_size) &&
  629. (ram - start_addr == mem->ram - mem->start_addr)) {
  630. /* The new slot fits into the existing one and comes with
  631. * identical parameters - update flags and done. */
  632. kvm_slot_update_flags(kml, mem, mr);
  633. return;
  634. }
  635. old = *mem;
  636. if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
  637. kvm_physical_sync_dirty_bitmap(kml, section);
  638. }
  639. /* unregister the overlapping slot */
  640. mem->memory_size = 0;
  641. err = kvm_set_user_memory_region(kml, mem);
  642. if (err) {
  643. fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
  644. __func__, strerror(-err));
  645. abort();
  646. }
  647. /* Workaround for older KVM versions: we can't join slots, even not by
  648. * unregistering the previous ones and then registering the larger
  649. * slot. We have to maintain the existing fragmentation. Sigh.
  650. *
  651. * This workaround assumes that the new slot starts at the same
  652. * address as the first existing one. If not or if some overlapping
  653. * slot comes around later, we will fail (not seen in practice so far)
  654. * - and actually require a recent KVM version. */
  655. if (s->broken_set_mem_region &&
  656. old.start_addr == start_addr && old.memory_size < size && add) {
  657. mem = kvm_alloc_slot(kml);
  658. mem->memory_size = old.memory_size;
  659. mem->start_addr = old.start_addr;
  660. mem->ram = old.ram;
  661. mem->flags = kvm_mem_flags(mr);
  662. err = kvm_set_user_memory_region(kml, mem);
  663. if (err) {
  664. fprintf(stderr, "%s: error updating slot: %s\n", __func__,
  665. strerror(-err));
  666. abort();
  667. }
  668. start_addr += old.memory_size;
  669. ram += old.memory_size;
  670. size -= old.memory_size;
  671. continue;
  672. }
  673. /* register prefix slot */
  674. if (old.start_addr < start_addr) {
  675. mem = kvm_alloc_slot(kml);
  676. mem->memory_size = start_addr - old.start_addr;
  677. mem->start_addr = old.start_addr;
  678. mem->ram = old.ram;
  679. mem->flags = kvm_mem_flags(mr);
  680. err = kvm_set_user_memory_region(kml, mem);
  681. if (err) {
  682. fprintf(stderr, "%s: error registering prefix slot: %s\n",
  683. __func__, strerror(-err));
  684. #ifdef TARGET_PPC
  685. fprintf(stderr, "%s: This is probably because your kernel's " \
  686. "PAGE_SIZE is too big. Please try to use 4k " \
  687. "PAGE_SIZE!\n", __func__);
  688. #endif
  689. abort();
  690. }
  691. }
  692. /* register suffix slot */
  693. if (old.start_addr + old.memory_size > start_addr + size) {
  694. ram_addr_t size_delta;
  695. mem = kvm_alloc_slot(kml);
  696. mem->start_addr = start_addr + size;
  697. size_delta = mem->start_addr - old.start_addr;
  698. mem->memory_size = old.memory_size - size_delta;
  699. mem->ram = old.ram + size_delta;
  700. mem->flags = kvm_mem_flags(mr);
  701. err = kvm_set_user_memory_region(kml, mem);
  702. if (err) {
  703. fprintf(stderr, "%s: error registering suffix slot: %s\n",
  704. __func__, strerror(-err));
  705. abort();
  706. }
  707. }
  708. }
  709. /* in case the KVM bug workaround already "consumed" the new slot */
  710. if (!size) {
  711. return;
  712. }
  713. if (!add) {
  714. return;
  715. }
  716. mem = kvm_alloc_slot(kml);
  717. mem->memory_size = size;
  718. mem->start_addr = start_addr;
  719. mem->ram = ram;
  720. mem->flags = kvm_mem_flags(mr);
  721. err = kvm_set_user_memory_region(kml, mem);
  722. if (err) {
  723. fprintf(stderr, "%s: error registering slot: %s\n", __func__,
  724. strerror(-err));
  725. abort();
  726. }
  727. }
  728. static void kvm_region_add(MemoryListener *listener,
  729. MemoryRegionSection *section)
  730. {
  731. KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
  732. memory_region_ref(section->mr);
  733. kvm_set_phys_mem(kml, section, true);
  734. }
  735. static void kvm_region_del(MemoryListener *listener,
  736. MemoryRegionSection *section)
  737. {
  738. KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
  739. kvm_set_phys_mem(kml, section, false);
  740. memory_region_unref(section->mr);
  741. }
  742. static void kvm_log_sync(MemoryListener *listener,
  743. MemoryRegionSection *section)
  744. {
  745. KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
  746. int r;
  747. r = kvm_physical_sync_dirty_bitmap(kml, section);
  748. if (r < 0) {
  749. abort();
  750. }
  751. }
  752. static void kvm_mem_ioeventfd_add(MemoryListener *listener,
  753. MemoryRegionSection *section,
  754. bool match_data, uint64_t data,
  755. EventNotifier *e)
  756. {
  757. int fd = event_notifier_get_fd(e);
  758. int r;
  759. r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
  760. data, true, int128_get64(section->size),
  761. match_data);
  762. if (r < 0) {
  763. fprintf(stderr, "%s: error adding ioeventfd: %s\n",
  764. __func__, strerror(-r));
  765. abort();
  766. }
  767. }
  768. static void kvm_mem_ioeventfd_del(MemoryListener *listener,
  769. MemoryRegionSection *section,
  770. bool match_data, uint64_t data,
  771. EventNotifier *e)
  772. {
  773. int fd = event_notifier_get_fd(e);
  774. int r;
  775. r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
  776. data, false, int128_get64(section->size),
  777. match_data);
  778. if (r < 0) {
  779. abort();
  780. }
  781. }
  782. static void kvm_io_ioeventfd_add(MemoryListener *listener,
  783. MemoryRegionSection *section,
  784. bool match_data, uint64_t data,
  785. EventNotifier *e)
  786. {
  787. int fd = event_notifier_get_fd(e);
  788. int r;
  789. r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
  790. data, true, int128_get64(section->size),
  791. match_data);
  792. if (r < 0) {
  793. fprintf(stderr, "%s: error adding ioeventfd: %s\n",
  794. __func__, strerror(-r));
  795. abort();
  796. }
  797. }
  798. static void kvm_io_ioeventfd_del(MemoryListener *listener,
  799. MemoryRegionSection *section,
  800. bool match_data, uint64_t data,
  801. EventNotifier *e)
  802. {
  803. int fd = event_notifier_get_fd(e);
  804. int r;
  805. r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
  806. data, false, int128_get64(section->size),
  807. match_data);
  808. if (r < 0) {
  809. abort();
  810. }
  811. }
  812. void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
  813. AddressSpace *as, int as_id)
  814. {
  815. int i;
  816. kml->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot));
  817. kml->as_id = as_id;
  818. for (i = 0; i < s->nr_slots; i++) {
  819. kml->slots[i].slot = i;
  820. }
  821. kml->listener.region_add = kvm_region_add;
  822. kml->listener.region_del = kvm_region_del;
  823. kml->listener.log_start = kvm_log_start;
  824. kml->listener.log_stop = kvm_log_stop;
  825. kml->listener.log_sync = kvm_log_sync;
  826. kml->listener.priority = 10;
  827. memory_listener_register(&kml->listener, as);
  828. }
  829. static MemoryListener kvm_io_listener = {
  830. .eventfd_add = kvm_io_ioeventfd_add,
  831. .eventfd_del = kvm_io_ioeventfd_del,
  832. .priority = 10,
  833. };
  834. static void kvm_handle_interrupt(CPUState *cpu, int mask)
  835. {
  836. cpu->interrupt_request |= mask;
  837. if (!qemu_cpu_is_self(cpu)) {
  838. qemu_cpu_kick(cpu);
  839. }
  840. }
  841. int kvm_set_irq(KVMState *s, int irq, int level)
  842. {
  843. struct kvm_irq_level event;
  844. int ret;
  845. assert(kvm_async_interrupts_enabled());
  846. event.level = level;
  847. event.irq = irq;
  848. ret = kvm_vm_ioctl(s, s->irq_set_ioctl, &event);
  849. if (ret < 0) {
  850. perror("kvm_set_irq");
  851. abort();
  852. }
  853. return (s->irq_set_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
  854. }
  855. #ifdef KVM_CAP_IRQ_ROUTING
  856. typedef struct KVMMSIRoute {
  857. struct kvm_irq_routing_entry kroute;
  858. QTAILQ_ENTRY(KVMMSIRoute) entry;
  859. } KVMMSIRoute;
  860. static void set_gsi(KVMState *s, unsigned int gsi)
  861. {
  862. set_bit(gsi, s->used_gsi_bitmap);
  863. }
  864. static void clear_gsi(KVMState *s, unsigned int gsi)
  865. {
  866. clear_bit(gsi, s->used_gsi_bitmap);
  867. }
  868. void kvm_init_irq_routing(KVMState *s)
  869. {
  870. int gsi_count, i;
  871. gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING) - 1;
  872. if (gsi_count > 0) {
  873. /* Round up so we can search ints using ffs */
  874. s->used_gsi_bitmap = bitmap_new(gsi_count);
  875. s->gsi_count = gsi_count;
  876. }
  877. s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
  878. s->nr_allocated_irq_routes = 0;
  879. if (!kvm_direct_msi_allowed) {
  880. for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
  881. QTAILQ_INIT(&s->msi_hashtab[i]);
  882. }
  883. }
  884. kvm_arch_init_irq_routing(s);
  885. }
  886. void kvm_irqchip_commit_routes(KVMState *s)
  887. {
  888. int ret;
  889. if (kvm_gsi_direct_mapping()) {
  890. return;
  891. }
  892. if (!kvm_gsi_routing_enabled()) {
  893. return;
  894. }
  895. s->irq_routes->flags = 0;
  896. trace_kvm_irqchip_commit_routes();
  897. ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
  898. assert(ret == 0);
  899. }
  900. static void kvm_add_routing_entry(KVMState *s,
  901. struct kvm_irq_routing_entry *entry)
  902. {
  903. struct kvm_irq_routing_entry *new;
  904. int n, size;
  905. if (s->irq_routes->nr == s->nr_allocated_irq_routes) {
  906. n = s->nr_allocated_irq_routes * 2;
  907. if (n < 64) {
  908. n = 64;
  909. }
  910. size = sizeof(struct kvm_irq_routing);
  911. size += n * sizeof(*new);
  912. s->irq_routes = g_realloc(s->irq_routes, size);
  913. s->nr_allocated_irq_routes = n;
  914. }
  915. n = s->irq_routes->nr++;
  916. new = &s->irq_routes->entries[n];
  917. *new = *entry;
  918. set_gsi(s, entry->gsi);
  919. }
  920. static int kvm_update_routing_entry(KVMState *s,
  921. struct kvm_irq_routing_entry *new_entry)
  922. {
  923. struct kvm_irq_routing_entry *entry;
  924. int n;
  925. for (n = 0; n < s->irq_routes->nr; n++) {
  926. entry = &s->irq_routes->entries[n];
  927. if (entry->gsi != new_entry->gsi) {
  928. continue;
  929. }
  930. if(!memcmp(entry, new_entry, sizeof *entry)) {
  931. return 0;
  932. }
  933. *entry = *new_entry;
  934. return 0;
  935. }
  936. return -ESRCH;
  937. }
  938. void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin)
  939. {
  940. struct kvm_irq_routing_entry e = {};
  941. assert(pin < s->gsi_count);
  942. e.gsi = irq;
  943. e.type = KVM_IRQ_ROUTING_IRQCHIP;
  944. e.flags = 0;
  945. e.u.irqchip.irqchip = irqchip;
  946. e.u.irqchip.pin = pin;
  947. kvm_add_routing_entry(s, &e);
  948. }
  949. void kvm_irqchip_release_virq(KVMState *s, int virq)
  950. {
  951. struct kvm_irq_routing_entry *e;
  952. int i;
  953. if (kvm_gsi_direct_mapping()) {
  954. return;
  955. }
  956. for (i = 0; i < s->irq_routes->nr; i++) {
  957. e = &s->irq_routes->entries[i];
  958. if (e->gsi == virq) {
  959. s->irq_routes->nr--;
  960. *e = s->irq_routes->entries[s->irq_routes->nr];
  961. }
  962. }
  963. clear_gsi(s, virq);
  964. kvm_arch_release_virq_post(virq);
  965. }
  966. static unsigned int kvm_hash_msi(uint32_t data)
  967. {
  968. /* This is optimized for IA32 MSI layout. However, no other arch shall
  969. * repeat the mistake of not providing a direct MSI injection API. */
  970. return data & 0xff;
  971. }
  972. static void kvm_flush_dynamic_msi_routes(KVMState *s)
  973. {
  974. KVMMSIRoute *route, *next;
  975. unsigned int hash;
  976. for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) {
  977. QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) {
  978. kvm_irqchip_release_virq(s, route->kroute.gsi);
  979. QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry);
  980. g_free(route);
  981. }
  982. }
  983. }
  984. static int kvm_irqchip_get_virq(KVMState *s)
  985. {
  986. int next_virq;
  987. /*
  988. * PIC and IOAPIC share the first 16 GSI numbers, thus the available
  989. * GSI numbers are more than the number of IRQ route. Allocating a GSI
  990. * number can succeed even though a new route entry cannot be added.
  991. * When this happens, flush dynamic MSI entries to free IRQ route entries.
  992. */
  993. if (!kvm_direct_msi_allowed && s->irq_routes->nr == s->gsi_count) {
  994. kvm_flush_dynamic_msi_routes(s);
  995. }
  996. /* Return the lowest unused GSI in the bitmap */
  997. next_virq = find_first_zero_bit(s->used_gsi_bitmap, s->gsi_count);
  998. if (next_virq >= s->gsi_count) {
  999. return -ENOSPC;
  1000. } else {
  1001. return next_virq;
  1002. }
  1003. }
  1004. static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
  1005. {
  1006. unsigned int hash = kvm_hash_msi(msg.data);
  1007. KVMMSIRoute *route;
  1008. QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) {
  1009. if (route->kroute.u.msi.address_lo == (uint32_t)msg.address &&
  1010. route->kroute.u.msi.address_hi == (msg.address >> 32) &&
  1011. route->kroute.u.msi.data == le32_to_cpu(msg.data)) {
  1012. return route;
  1013. }
  1014. }
  1015. return NULL;
  1016. }
  1017. int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
  1018. {
  1019. struct kvm_msi msi;
  1020. KVMMSIRoute *route;
  1021. if (kvm_direct_msi_allowed) {
  1022. msi.address_lo = (uint32_t)msg.address;
  1023. msi.address_hi = msg.address >> 32;
  1024. msi.data = le32_to_cpu(msg.data);
  1025. msi.flags = 0;
  1026. memset(msi.pad, 0, sizeof(msi.pad));
  1027. return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi);
  1028. }
  1029. route = kvm_lookup_msi_route(s, msg);
  1030. if (!route) {
  1031. int virq;
  1032. virq = kvm_irqchip_get_virq(s);
  1033. if (virq < 0) {
  1034. return virq;
  1035. }
  1036. route = g_malloc0(sizeof(KVMMSIRoute));
  1037. route->kroute.gsi = virq;
  1038. route->kroute.type = KVM_IRQ_ROUTING_MSI;
  1039. route->kroute.flags = 0;
  1040. route->kroute.u.msi.address_lo = (uint32_t)msg.address;
  1041. route->kroute.u.msi.address_hi = msg.address >> 32;
  1042. route->kroute.u.msi.data = le32_to_cpu(msg.data);
  1043. kvm_add_routing_entry(s, &route->kroute);
  1044. kvm_irqchip_commit_routes(s);
  1045. QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route,
  1046. entry);
  1047. }
  1048. assert(route->kroute.type == KVM_IRQ_ROUTING_MSI);
  1049. return kvm_set_irq(s, route->kroute.gsi, 1);
  1050. }
  1051. int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
  1052. {
  1053. struct kvm_irq_routing_entry kroute = {};
  1054. int virq;
  1055. MSIMessage msg = {0, 0};
  1056. if (dev) {
  1057. msg = pci_get_msi_message(dev, vector);
  1058. }
  1059. if (kvm_gsi_direct_mapping()) {
  1060. return kvm_arch_msi_data_to_gsi(msg.data);
  1061. }
  1062. if (!kvm_gsi_routing_enabled()) {
  1063. return -ENOSYS;
  1064. }
  1065. virq = kvm_irqchip_get_virq(s);
  1066. if (virq < 0) {
  1067. return virq;
  1068. }
  1069. kroute.gsi = virq;
  1070. kroute.type = KVM_IRQ_ROUTING_MSI;
  1071. kroute.flags = 0;
  1072. kroute.u.msi.address_lo = (uint32_t)msg.address;
  1073. kroute.u.msi.address_hi = msg.address >> 32;
  1074. kroute.u.msi.data = le32_to_cpu(msg.data);
  1075. if (kvm_msi_devid_required()) {
  1076. kroute.flags = KVM_MSI_VALID_DEVID;
  1077. kroute.u.msi.devid = pci_requester_id(dev);
  1078. }
  1079. if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
  1080. kvm_irqchip_release_virq(s, virq);
  1081. return -EINVAL;
  1082. }
  1083. trace_kvm_irqchip_add_msi_route(virq);
  1084. kvm_add_routing_entry(s, &kroute);
  1085. kvm_arch_add_msi_route_post(&kroute, vector, dev);
  1086. kvm_irqchip_commit_routes(s);
  1087. return virq;
  1088. }
  1089. int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
  1090. PCIDevice *dev)
  1091. {
  1092. struct kvm_irq_routing_entry kroute = {};
  1093. if (kvm_gsi_direct_mapping()) {
  1094. return 0;
  1095. }
  1096. if (!kvm_irqchip_in_kernel()) {
  1097. return -ENOSYS;
  1098. }
  1099. kroute.gsi = virq;
  1100. kroute.type = KVM_IRQ_ROUTING_MSI;
  1101. kroute.flags = 0;
  1102. kroute.u.msi.address_lo = (uint32_t)msg.address;
  1103. kroute.u.msi.address_hi = msg.address >> 32;
  1104. kroute.u.msi.data = le32_to_cpu(msg.data);
  1105. if (kvm_msi_devid_required()) {
  1106. kroute.flags = KVM_MSI_VALID_DEVID;
  1107. kroute.u.msi.devid = pci_requester_id(dev);
  1108. }
  1109. if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
  1110. return -EINVAL;
  1111. }
  1112. trace_kvm_irqchip_update_msi_route(virq);
  1113. return kvm_update_routing_entry(s, &kroute);
  1114. }
  1115. static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int rfd, int virq,
  1116. bool assign)
  1117. {
  1118. struct kvm_irqfd irqfd = {
  1119. .fd = fd,
  1120. .gsi = virq,
  1121. .flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
  1122. };
  1123. if (rfd != -1) {
  1124. irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
  1125. irqfd.resamplefd = rfd;
  1126. }
  1127. if (!kvm_irqfds_enabled()) {
  1128. return -ENOSYS;
  1129. }
  1130. return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
  1131. }
  1132. int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
  1133. {
  1134. struct kvm_irq_routing_entry kroute = {};
  1135. int virq;
  1136. if (!kvm_gsi_routing_enabled()) {
  1137. return -ENOSYS;
  1138. }
  1139. virq = kvm_irqchip_get_virq(s);
  1140. if (virq < 0) {
  1141. return virq;
  1142. }
  1143. kroute.gsi = virq;
  1144. kroute.type = KVM_IRQ_ROUTING_S390_ADAPTER;
  1145. kroute.flags = 0;
  1146. kroute.u.adapter.summary_addr = adapter->summary_addr;
  1147. kroute.u.adapter.ind_addr = adapter->ind_addr;
  1148. kroute.u.adapter.summary_offset = adapter->summary_offset;
  1149. kroute.u.adapter.ind_offset = adapter->ind_offset;
  1150. kroute.u.adapter.adapter_id = adapter->adapter_id;
  1151. kvm_add_routing_entry(s, &kroute);
  1152. return virq;
  1153. }
  1154. int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
  1155. {
  1156. struct kvm_irq_routing_entry kroute = {};
  1157. int virq;
  1158. if (!kvm_gsi_routing_enabled()) {
  1159. return -ENOSYS;
  1160. }
  1161. if (!kvm_check_extension(s, KVM_CAP_HYPERV_SYNIC)) {
  1162. return -ENOSYS;
  1163. }
  1164. virq = kvm_irqchip_get_virq(s);
  1165. if (virq < 0) {
  1166. return virq;
  1167. }
  1168. kroute.gsi = virq;
  1169. kroute.type = KVM_IRQ_ROUTING_HV_SINT;
  1170. kroute.flags = 0;
  1171. kroute.u.hv_sint.vcpu = vcpu;
  1172. kroute.u.hv_sint.sint = sint;
  1173. kvm_add_routing_entry(s, &kroute);
  1174. kvm_irqchip_commit_routes(s);
  1175. return virq;
  1176. }
  1177. #else /* !KVM_CAP_IRQ_ROUTING */
  1178. void kvm_init_irq_routing(KVMState *s)
  1179. {
  1180. }
  1181. void kvm_irqchip_release_virq(KVMState *s, int virq)
  1182. {
  1183. }
  1184. int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
  1185. {
  1186. abort();
  1187. }
  1188. int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
  1189. {
  1190. return -ENOSYS;
  1191. }
  1192. int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
  1193. {
  1194. return -ENOSYS;
  1195. }
  1196. int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
  1197. {
  1198. return -ENOSYS;
  1199. }
  1200. static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
  1201. {
  1202. abort();
  1203. }
  1204. int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
  1205. {
  1206. return -ENOSYS;
  1207. }
  1208. #endif /* !KVM_CAP_IRQ_ROUTING */
  1209. int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
  1210. EventNotifier *rn, int virq)
  1211. {
  1212. return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
  1213. rn ? event_notifier_get_fd(rn) : -1, virq, true);
  1214. }
  1215. int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
  1216. int virq)
  1217. {
  1218. return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
  1219. false);
  1220. }
  1221. int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
  1222. EventNotifier *rn, qemu_irq irq)
  1223. {
  1224. gpointer key, gsi;
  1225. gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
  1226. if (!found) {
  1227. return -ENXIO;
  1228. }
  1229. return kvm_irqchip_add_irqfd_notifier_gsi(s, n, rn, GPOINTER_TO_INT(gsi));
  1230. }
  1231. int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
  1232. qemu_irq irq)
  1233. {
  1234. gpointer key, gsi;
  1235. gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
  1236. if (!found) {
  1237. return -ENXIO;
  1238. }
  1239. return kvm_irqchip_remove_irqfd_notifier_gsi(s, n, GPOINTER_TO_INT(gsi));
  1240. }
  1241. void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi)
  1242. {
  1243. g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi));
  1244. }
  1245. static void kvm_irqchip_create(MachineState *machine, KVMState *s)
  1246. {
  1247. int ret;
  1248. if (kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
  1249. ;
  1250. } else if (kvm_check_extension(s, KVM_CAP_S390_IRQCHIP)) {
  1251. ret = kvm_vm_enable_cap(s, KVM_CAP_S390_IRQCHIP, 0);
  1252. if (ret < 0) {
  1253. fprintf(stderr, "Enable kernel irqchip failed: %s\n", strerror(-ret));
  1254. exit(1);
  1255. }
  1256. } else {
  1257. return;
  1258. }
  1259. /* First probe and see if there's a arch-specific hook to create the
  1260. * in-kernel irqchip for us */
  1261. ret = kvm_arch_irqchip_create(machine, s);
  1262. if (ret == 0) {
  1263. if (machine_kernel_irqchip_split(machine)) {
  1264. perror("Split IRQ chip mode not supported.");
  1265. exit(1);
  1266. } else {
  1267. ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
  1268. }
  1269. }
  1270. if (ret < 0) {
  1271. fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret));
  1272. exit(1);
  1273. }
  1274. kvm_kernel_irqchip = true;
  1275. /* If we have an in-kernel IRQ chip then we must have asynchronous
  1276. * interrupt delivery (though the reverse is not necessarily true)
  1277. */
  1278. kvm_async_interrupts_allowed = true;
  1279. kvm_halt_in_kernel_allowed = true;
  1280. kvm_init_irq_routing(s);
  1281. s->gsimap = g_hash_table_new(g_direct_hash, g_direct_equal);
  1282. }
  1283. /* Find number of supported CPUs using the recommended
  1284. * procedure from the kernel API documentation to cope with
  1285. * older kernels that may be missing capabilities.
  1286. */
  1287. static int kvm_recommended_vcpus(KVMState *s)
  1288. {
  1289. int ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS);
  1290. return (ret) ? ret : 4;
  1291. }
  1292. static int kvm_max_vcpus(KVMState *s)
  1293. {
  1294. int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
  1295. return (ret) ? ret : kvm_recommended_vcpus(s);
  1296. }
  1297. static int kvm_max_vcpu_id(KVMState *s)
  1298. {
  1299. int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID);
  1300. return (ret) ? ret : kvm_max_vcpus(s);
  1301. }
  1302. bool kvm_vcpu_id_is_valid(int vcpu_id)
  1303. {
  1304. KVMState *s = KVM_STATE(current_machine->accelerator);
  1305. return vcpu_id >= 0 && vcpu_id < kvm_max_vcpu_id(s);
  1306. }
  1307. static int kvm_init(MachineState *ms)
  1308. {
  1309. MachineClass *mc = MACHINE_GET_CLASS(ms);
  1310. static const char upgrade_note[] =
  1311. "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
  1312. "(see http://sourceforge.net/projects/kvm).\n";
  1313. struct {
  1314. const char *name;
  1315. int num;
  1316. } num_cpus[] = {
  1317. { "SMP", smp_cpus },
  1318. { "hotpluggable", max_cpus },
  1319. { NULL, }
  1320. }, *nc = num_cpus;
  1321. int soft_vcpus_limit, hard_vcpus_limit;
  1322. KVMState *s;
  1323. const KVMCapabilityInfo *missing_cap;
  1324. int ret;
  1325. int type = 0;
  1326. const char *kvm_type;
  1327. s = KVM_STATE(ms->accelerator);
  1328. /*
  1329. * On systems where the kernel can support different base page
  1330. * sizes, host page size may be different from TARGET_PAGE_SIZE,
  1331. * even with KVM. TARGET_PAGE_SIZE is assumed to be the minimum
  1332. * page size for the system though.
  1333. */
  1334. assert(TARGET_PAGE_SIZE <= getpagesize());
  1335. s->sigmask_len = 8;
  1336. #ifdef KVM_CAP_SET_GUEST_DEBUG
  1337. QTAILQ_INIT(&s->kvm_sw_breakpoints);
  1338. #endif
  1339. QLIST_INIT(&s->kvm_parked_vcpus);
  1340. s->vmfd = -1;
  1341. s->fd = qemu_open("/dev/kvm", O_RDWR);
  1342. if (s->fd == -1) {
  1343. fprintf(stderr, "Could not access KVM kernel module: %m\n");
  1344. ret = -errno;
  1345. goto err;
  1346. }
  1347. ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
  1348. if (ret < KVM_API_VERSION) {
  1349. if (ret >= 0) {
  1350. ret = -EINVAL;
  1351. }
  1352. fprintf(stderr, "kvm version too old\n");
  1353. goto err;
  1354. }
  1355. if (ret > KVM_API_VERSION) {
  1356. ret = -EINVAL;
  1357. fprintf(stderr, "kvm version not supported\n");
  1358. goto err;
  1359. }
  1360. kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
  1361. s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
  1362. /* If unspecified, use the default value */
  1363. if (!s->nr_slots) {
  1364. s->nr_slots = 32;
  1365. }
  1366. /* check the vcpu limits */
  1367. soft_vcpus_limit = kvm_recommended_vcpus(s);
  1368. hard_vcpus_limit = kvm_max_vcpus(s);
  1369. while (nc->name) {
  1370. if (nc->num > soft_vcpus_limit) {
  1371. fprintf(stderr,
  1372. "Warning: Number of %s cpus requested (%d) exceeds "
  1373. "the recommended cpus supported by KVM (%d)\n",
  1374. nc->name, nc->num, soft_vcpus_limit);
  1375. if (nc->num > hard_vcpus_limit) {
  1376. fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
  1377. "the maximum cpus supported by KVM (%d)\n",
  1378. nc->name, nc->num, hard_vcpus_limit);
  1379. exit(1);
  1380. }
  1381. }
  1382. nc++;
  1383. }
  1384. kvm_type = qemu_opt_get(qemu_get_machine_opts(), "kvm-type");
  1385. if (mc->kvm_type) {
  1386. type = mc->kvm_type(kvm_type);
  1387. } else if (kvm_type) {
  1388. ret = -EINVAL;
  1389. fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type);
  1390. goto err;
  1391. }
  1392. do {
  1393. ret = kvm_ioctl(s, KVM_CREATE_VM, type);
  1394. } while (ret == -EINTR);
  1395. if (ret < 0) {
  1396. fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -ret,
  1397. strerror(-ret));
  1398. #ifdef TARGET_S390X
  1399. if (ret == -EINVAL) {
  1400. fprintf(stderr,
  1401. "Host kernel setup problem detected. Please verify:\n");
  1402. fprintf(stderr, "- for kernels supporting the switch_amode or"
  1403. " user_mode parameters, whether\n");
  1404. fprintf(stderr,
  1405. " user space is running in primary address space\n");
  1406. fprintf(stderr,
  1407. "- for kernels supporting the vm.allocate_pgste sysctl, "
  1408. "whether it is enabled\n");
  1409. }
  1410. #endif
  1411. goto err;
  1412. }
  1413. s->vmfd = ret;
  1414. missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
  1415. if (!missing_cap) {
  1416. missing_cap =
  1417. kvm_check_extension_list(s, kvm_arch_required_capabilities);
  1418. }
  1419. if (missing_cap) {
  1420. ret = -EINVAL;
  1421. fprintf(stderr, "kvm does not support %s\n%s",
  1422. missing_cap->name, upgrade_note);
  1423. goto err;
  1424. }
  1425. s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
  1426. s->broken_set_mem_region = 1;
  1427. ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
  1428. if (ret > 0) {
  1429. s->broken_set_mem_region = 0;
  1430. }
  1431. #ifdef KVM_CAP_VCPU_EVENTS
  1432. s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
  1433. #endif
  1434. s->robust_singlestep =
  1435. kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
  1436. #ifdef KVM_CAP_DEBUGREGS
  1437. s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
  1438. #endif
  1439. #ifdef KVM_CAP_IRQ_ROUTING
  1440. kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
  1441. #endif
  1442. s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
  1443. s->irq_set_ioctl = KVM_IRQ_LINE;
  1444. if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
  1445. s->irq_set_ioctl = KVM_IRQ_LINE_STATUS;
  1446. }
  1447. #ifdef KVM_CAP_READONLY_MEM
  1448. kvm_readonly_mem_allowed =
  1449. (kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0);
  1450. #endif
  1451. kvm_eventfds_allowed =
  1452. (kvm_check_extension(s, KVM_CAP_IOEVENTFD) > 0);
  1453. kvm_irqfds_allowed =
  1454. (kvm_check_extension(s, KVM_CAP_IRQFD) > 0);
  1455. kvm_resamplefds_allowed =
  1456. (kvm_check_extension(s, KVM_CAP_IRQFD_RESAMPLE) > 0);
  1457. kvm_vm_attributes_allowed =
  1458. (kvm_check_extension(s, KVM_CAP_VM_ATTRIBUTES) > 0);
  1459. kvm_ioeventfd_any_length_allowed =
  1460. (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
  1461. ret = kvm_arch_init(ms, s);
  1462. if (ret < 0) {
  1463. goto err;
  1464. }
  1465. if (machine_kernel_irqchip_allowed(ms)) {
  1466. kvm_irqchip_create(ms, s);
  1467. }
  1468. kvm_state = s;
  1469. if (kvm_eventfds_allowed) {
  1470. s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
  1471. s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
  1472. }
  1473. s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
  1474. s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
  1475. kvm_memory_listener_register(s, &s->memory_listener,
  1476. &address_space_memory, 0);
  1477. memory_listener_register(&kvm_io_listener,
  1478. &address_space_io);
  1479. s->many_ioeventfds = kvm_check_many_ioeventfds();
  1480. cpu_interrupt_handler = kvm_handle_interrupt;
  1481. return 0;
  1482. err:
  1483. assert(ret < 0);
  1484. if (s->vmfd >= 0) {
  1485. close(s->vmfd);
  1486. }
  1487. if (s->fd != -1) {
  1488. close(s->fd);
  1489. }
  1490. g_free(s->memory_listener.slots);
  1491. return ret;
  1492. }
  1493. void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len)
  1494. {
  1495. s->sigmask_len = sigmask_len;
  1496. }
  1497. static void kvm_handle_io(uint16_t port, MemTxAttrs attrs, void *data, int direction,
  1498. int size, uint32_t count)
  1499. {
  1500. int i;
  1501. uint8_t *ptr = data;
  1502. for (i = 0; i < count; i++) {
  1503. address_space_rw(&address_space_io, port, attrs,
  1504. ptr, size,
  1505. direction == KVM_EXIT_IO_OUT);
  1506. ptr += size;
  1507. }
  1508. }
  1509. static int kvm_handle_internal_error(CPUState *cpu, struct kvm_run *run)
  1510. {
  1511. fprintf(stderr, "KVM internal error. Suberror: %d\n",
  1512. run->internal.suberror);
  1513. if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
  1514. int i;
  1515. for (i = 0; i < run->internal.ndata; ++i) {
  1516. fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
  1517. i, (uint64_t)run->internal.data[i]);
  1518. }
  1519. }
  1520. if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
  1521. fprintf(stderr, "emulation failure\n");
  1522. if (!kvm_arch_stop_on_emulation_error(cpu)) {
  1523. cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
  1524. return EXCP_INTERRUPT;
  1525. }
  1526. }
  1527. /* FIXME: Should trigger a qmp message to let management know
  1528. * something went wrong.
  1529. */
  1530. return -1;
  1531. }
  1532. void kvm_flush_coalesced_mmio_buffer(void)
  1533. {
  1534. KVMState *s = kvm_state;
  1535. if (s->coalesced_flush_in_progress) {
  1536. return;
  1537. }
  1538. s->coalesced_flush_in_progress = true;
  1539. if (s->coalesced_mmio_ring) {
  1540. struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
  1541. while (ring->first != ring->last) {
  1542. struct kvm_coalesced_mmio *ent;
  1543. ent = &ring->coalesced_mmio[ring->first];
  1544. cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
  1545. smp_wmb();
  1546. ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
  1547. }
  1548. }
  1549. s->coalesced_flush_in_progress = false;
  1550. }
  1551. static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
  1552. {
  1553. if (!cpu->kvm_vcpu_dirty) {
  1554. kvm_arch_get_registers(cpu);
  1555. cpu->kvm_vcpu_dirty = true;
  1556. }
  1557. }
  1558. void kvm_cpu_synchronize_state(CPUState *cpu)
  1559. {
  1560. if (!cpu->kvm_vcpu_dirty) {
  1561. run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
  1562. }
  1563. }
  1564. static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg)
  1565. {
  1566. kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
  1567. cpu->kvm_vcpu_dirty = false;
  1568. }
  1569. void kvm_cpu_synchronize_post_reset(CPUState *cpu)
  1570. {
  1571. run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
  1572. }
  1573. static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
  1574. {
  1575. kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
  1576. cpu->kvm_vcpu_dirty = false;
  1577. }
  1578. void kvm_cpu_synchronize_post_init(CPUState *cpu)
  1579. {
  1580. run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
  1581. }
  1582. #ifdef KVM_HAVE_MCE_INJECTION
  1583. static __thread void *pending_sigbus_addr;
  1584. static __thread int pending_sigbus_code;
  1585. static __thread bool have_sigbus_pending;
  1586. #endif
  1587. static void kvm_cpu_kick(CPUState *cpu)
  1588. {
  1589. atomic_set(&cpu->kvm_run->immediate_exit, 1);
  1590. }
  1591. static void kvm_cpu_kick_self(void)
  1592. {
  1593. if (kvm_immediate_exit) {
  1594. kvm_cpu_kick(current_cpu);
  1595. } else {
  1596. qemu_cpu_kick_self();
  1597. }
  1598. }
  1599. static void kvm_eat_signals(CPUState *cpu)
  1600. {
  1601. struct timespec ts = { 0, 0 };
  1602. siginfo_t siginfo;
  1603. sigset_t waitset;
  1604. sigset_t chkset;
  1605. int r;
  1606. if (kvm_immediate_exit) {
  1607. atomic_set(&cpu->kvm_run->immediate_exit, 0);
  1608. /* Write kvm_run->immediate_exit before the cpu->exit_request
  1609. * write in kvm_cpu_exec.
  1610. */
  1611. smp_wmb();
  1612. return;
  1613. }
  1614. sigemptyset(&waitset);
  1615. sigaddset(&waitset, SIG_IPI);
  1616. do {
  1617. r = sigtimedwait(&waitset, &siginfo, &ts);
  1618. if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
  1619. perror("sigtimedwait");
  1620. exit(1);
  1621. }
  1622. r = sigpending(&chkset);
  1623. if (r == -1) {
  1624. perror("sigpending");
  1625. exit(1);
  1626. }
  1627. } while (sigismember(&chkset, SIG_IPI));
  1628. }
  1629. int kvm_cpu_exec(CPUState *cpu)
  1630. {
  1631. struct kvm_run *run = cpu->kvm_run;
  1632. int ret, run_ret;
  1633. DPRINTF("kvm_cpu_exec()\n");
  1634. if (kvm_arch_process_async_events(cpu)) {
  1635. atomic_set(&cpu->exit_request, 0);
  1636. return EXCP_HLT;
  1637. }
  1638. qemu_mutex_unlock_iothread();
  1639. do {
  1640. MemTxAttrs attrs;
  1641. if (cpu->kvm_vcpu_dirty) {
  1642. kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
  1643. cpu->kvm_vcpu_dirty = false;
  1644. }
  1645. kvm_arch_pre_run(cpu, run);
  1646. if (atomic_read(&cpu->exit_request)) {
  1647. DPRINTF("interrupt exit requested\n");
  1648. /*
  1649. * KVM requires us to reenter the kernel after IO exits to complete
  1650. * instruction emulation. This self-signal will ensure that we
  1651. * leave ASAP again.
  1652. */
  1653. kvm_cpu_kick_self();
  1654. }
  1655. /* Read cpu->exit_request before KVM_RUN reads run->immediate_exit.
  1656. * Matching barrier in kvm_eat_signals.
  1657. */
  1658. smp_rmb();
  1659. run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
  1660. attrs = kvm_arch_post_run(cpu, run);
  1661. #ifdef KVM_HAVE_MCE_INJECTION
  1662. if (unlikely(have_sigbus_pending)) {
  1663. qemu_mutex_lock_iothread();
  1664. kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code,
  1665. pending_sigbus_addr);
  1666. have_sigbus_pending = false;
  1667. qemu_mutex_unlock_iothread();
  1668. }
  1669. #endif
  1670. if (run_ret < 0) {
  1671. if (run_ret == -EINTR || run_ret == -EAGAIN) {
  1672. DPRINTF("io window exit\n");
  1673. kvm_eat_signals(cpu);
  1674. ret = EXCP_INTERRUPT;
  1675. break;
  1676. }
  1677. fprintf(stderr, "error: kvm run failed %s\n",
  1678. strerror(-run_ret));
  1679. #ifdef TARGET_PPC
  1680. if (run_ret == -EBUSY) {
  1681. fprintf(stderr,
  1682. "This is probably because your SMT is enabled.\n"
  1683. "VCPU can only run on primary threads with all "
  1684. "secondary threads offline.\n");
  1685. }
  1686. #endif
  1687. ret = -1;
  1688. break;
  1689. }
  1690. trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
  1691. switch (run->exit_reason) {
  1692. case KVM_EXIT_IO:
  1693. DPRINTF("handle_io\n");
  1694. /* Called outside BQL */
  1695. kvm_handle_io(run->io.port, attrs,
  1696. (uint8_t *)run + run->io.data_offset,
  1697. run->io.direction,
  1698. run->io.size,
  1699. run->io.count);
  1700. ret = 0;
  1701. break;
  1702. case KVM_EXIT_MMIO:
  1703. DPRINTF("handle_mmio\n");
  1704. /* Called outside BQL */
  1705. address_space_rw(&address_space_memory,
  1706. run->mmio.phys_addr, attrs,
  1707. run->mmio.data,
  1708. run->mmio.len,
  1709. run->mmio.is_write);
  1710. ret = 0;
  1711. break;
  1712. case KVM_EXIT_IRQ_WINDOW_OPEN:
  1713. DPRINTF("irq_window_open\n");
  1714. ret = EXCP_INTERRUPT;
  1715. break;
  1716. case KVM_EXIT_SHUTDOWN:
  1717. DPRINTF("shutdown\n");
  1718. qemu_system_reset_request();
  1719. ret = EXCP_INTERRUPT;
  1720. break;
  1721. case KVM_EXIT_UNKNOWN:
  1722. fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
  1723. (uint64_t)run->hw.hardware_exit_reason);
  1724. ret = -1;
  1725. break;
  1726. case KVM_EXIT_INTERNAL_ERROR:
  1727. ret = kvm_handle_internal_error(cpu, run);
  1728. break;
  1729. case KVM_EXIT_SYSTEM_EVENT:
  1730. switch (run->system_event.type) {
  1731. case KVM_SYSTEM_EVENT_SHUTDOWN:
  1732. qemu_system_shutdown_request();
  1733. ret = EXCP_INTERRUPT;
  1734. break;
  1735. case KVM_SYSTEM_EVENT_RESET:
  1736. qemu_system_reset_request();
  1737. ret = EXCP_INTERRUPT;
  1738. break;
  1739. case KVM_SYSTEM_EVENT_CRASH:
  1740. kvm_cpu_synchronize_state(cpu);
  1741. qemu_mutex_lock_iothread();
  1742. qemu_system_guest_panicked(cpu_get_crash_info(cpu));
  1743. qemu_mutex_unlock_iothread();
  1744. ret = 0;
  1745. break;
  1746. default:
  1747. DPRINTF("kvm_arch_handle_exit\n");
  1748. ret = kvm_arch_handle_exit(cpu, run);
  1749. break;
  1750. }
  1751. break;
  1752. default:
  1753. DPRINTF("kvm_arch_handle_exit\n");
  1754. ret = kvm_arch_handle_exit(cpu, run);
  1755. break;
  1756. }
  1757. } while (ret == 0);
  1758. qemu_mutex_lock_iothread();
  1759. if (ret < 0) {
  1760. cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
  1761. vm_stop(RUN_STATE_INTERNAL_ERROR);
  1762. }
  1763. atomic_set(&cpu->exit_request, 0);
  1764. return ret;
  1765. }
  1766. int kvm_ioctl(KVMState *s, int type, ...)
  1767. {
  1768. int ret;
  1769. void *arg;
  1770. va_list ap;
  1771. va_start(ap, type);
  1772. arg = va_arg(ap, void *);
  1773. va_end(ap);
  1774. trace_kvm_ioctl(type, arg);
  1775. ret = ioctl(s->fd, type, arg);
  1776. if (ret == -1) {
  1777. ret = -errno;
  1778. }
  1779. return ret;
  1780. }
  1781. int kvm_vm_ioctl(KVMState *s, int type, ...)
  1782. {
  1783. int ret;
  1784. void *arg;
  1785. va_list ap;
  1786. va_start(ap, type);
  1787. arg = va_arg(ap, void *);
  1788. va_end(ap);
  1789. trace_kvm_vm_ioctl(type, arg);
  1790. ret = ioctl(s->vmfd, type, arg);
  1791. if (ret == -1) {
  1792. ret = -errno;
  1793. }
  1794. return ret;
  1795. }
  1796. int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
  1797. {
  1798. int ret;
  1799. void *arg;
  1800. va_list ap;
  1801. va_start(ap, type);
  1802. arg = va_arg(ap, void *);
  1803. va_end(ap);
  1804. trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
  1805. ret = ioctl(cpu->kvm_fd, type, arg);
  1806. if (ret == -1) {
  1807. ret = -errno;
  1808. }
  1809. return ret;
  1810. }
  1811. int kvm_device_ioctl(int fd, int type, ...)
  1812. {
  1813. int ret;
  1814. void *arg;
  1815. va_list ap;
  1816. va_start(ap, type);
  1817. arg = va_arg(ap, void *);
  1818. va_end(ap);
  1819. trace_kvm_device_ioctl(fd, type, arg);
  1820. ret = ioctl(fd, type, arg);
  1821. if (ret == -1) {
  1822. ret = -errno;
  1823. }
  1824. return ret;
  1825. }
  1826. int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr)
  1827. {
  1828. int ret;
  1829. struct kvm_device_attr attribute = {
  1830. .group = group,
  1831. .attr = attr,
  1832. };
  1833. if (!kvm_vm_attributes_allowed) {
  1834. return 0;
  1835. }
  1836. ret = kvm_vm_ioctl(s, KVM_HAS_DEVICE_ATTR, &attribute);
  1837. /* kvm returns 0 on success for HAS_DEVICE_ATTR */
  1838. return ret ? 0 : 1;
  1839. }
  1840. int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
  1841. {
  1842. struct kvm_device_attr attribute = {
  1843. .group = group,
  1844. .attr = attr,
  1845. .flags = 0,
  1846. };
  1847. return kvm_device_ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute) ? 0 : 1;
  1848. }
  1849. void kvm_device_access(int fd, int group, uint64_t attr,
  1850. void *val, bool write)
  1851. {
  1852. struct kvm_device_attr kvmattr;
  1853. int err;
  1854. kvmattr.flags = 0;
  1855. kvmattr.group = group;
  1856. kvmattr.attr = attr;
  1857. kvmattr.addr = (uintptr_t)val;
  1858. err = kvm_device_ioctl(fd,
  1859. write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
  1860. &kvmattr);
  1861. if (err < 0) {
  1862. error_report("KVM_%s_DEVICE_ATTR failed: %s",
  1863. write ? "SET" : "GET", strerror(-err));
  1864. error_printf("Group %d attr 0x%016" PRIx64 "\n", group, attr);
  1865. abort();
  1866. }
  1867. }
  1868. /* Return 1 on success, 0 on failure */
  1869. int kvm_has_sync_mmu(void)
  1870. {
  1871. return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
  1872. }
  1873. int kvm_has_vcpu_events(void)
  1874. {
  1875. return kvm_state->vcpu_events;
  1876. }
  1877. int kvm_has_robust_singlestep(void)
  1878. {
  1879. return kvm_state->robust_singlestep;
  1880. }
  1881. int kvm_has_debugregs(void)
  1882. {
  1883. return kvm_state->debugregs;
  1884. }
  1885. int kvm_has_many_ioeventfds(void)
  1886. {
  1887. if (!kvm_enabled()) {
  1888. return 0;
  1889. }
  1890. return kvm_state->many_ioeventfds;
  1891. }
  1892. int kvm_has_gsi_routing(void)
  1893. {
  1894. #ifdef KVM_CAP_IRQ_ROUTING
  1895. return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
  1896. #else
  1897. return false;
  1898. #endif
  1899. }
  1900. int kvm_has_intx_set_mask(void)
  1901. {
  1902. return kvm_state->intx_set_mask;
  1903. }
  1904. #ifdef KVM_CAP_SET_GUEST_DEBUG
  1905. struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
  1906. target_ulong pc)
  1907. {
  1908. struct kvm_sw_breakpoint *bp;
  1909. QTAILQ_FOREACH(bp, &cpu->kvm_state->kvm_sw_breakpoints, entry) {
  1910. if (bp->pc == pc) {
  1911. return bp;
  1912. }
  1913. }
  1914. return NULL;
  1915. }
  1916. int kvm_sw_breakpoints_active(CPUState *cpu)
  1917. {
  1918. return !QTAILQ_EMPTY(&cpu->kvm_state->kvm_sw_breakpoints);
  1919. }
  1920. struct kvm_set_guest_debug_data {
  1921. struct kvm_guest_debug dbg;
  1922. int err;
  1923. };
  1924. static void kvm_invoke_set_guest_debug(CPUState *cpu, run_on_cpu_data data)
  1925. {
  1926. struct kvm_set_guest_debug_data *dbg_data =
  1927. (struct kvm_set_guest_debug_data *) data.host_ptr;
  1928. dbg_data->err = kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG,
  1929. &dbg_data->dbg);
  1930. }
  1931. int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
  1932. {
  1933. struct kvm_set_guest_debug_data data;
  1934. data.dbg.control = reinject_trap;
  1935. if (cpu->singlestep_enabled) {
  1936. data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
  1937. }
  1938. kvm_arch_update_guest_debug(cpu, &data.dbg);
  1939. run_on_cpu(cpu, kvm_invoke_set_guest_debug,
  1940. RUN_ON_CPU_HOST_PTR(&data));
  1941. return data.err;
  1942. }
  1943. int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
  1944. target_ulong len, int type)
  1945. {
  1946. struct kvm_sw_breakpoint *bp;
  1947. int err;
  1948. if (type == GDB_BREAKPOINT_SW) {
  1949. bp = kvm_find_sw_breakpoint(cpu, addr);
  1950. if (bp) {
  1951. bp->use_count++;
  1952. return 0;
  1953. }
  1954. bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
  1955. bp->pc = addr;
  1956. bp->use_count = 1;
  1957. err = kvm_arch_insert_sw_breakpoint(cpu, bp);
  1958. if (err) {
  1959. g_free(bp);
  1960. return err;
  1961. }
  1962. QTAILQ_INSERT_HEAD(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
  1963. } else {
  1964. err = kvm_arch_insert_hw_breakpoint(addr, len, type);
  1965. if (err) {
  1966. return err;
  1967. }
  1968. }
  1969. CPU_FOREACH(cpu) {
  1970. err = kvm_update_guest_debug(cpu, 0);
  1971. if (err) {
  1972. return err;
  1973. }
  1974. }
  1975. return 0;
  1976. }
  1977. int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
  1978. target_ulong len, int type)
  1979. {
  1980. struct kvm_sw_breakpoint *bp;
  1981. int err;
  1982. if (type == GDB_BREAKPOINT_SW) {
  1983. bp = kvm_find_sw_breakpoint(cpu, addr);
  1984. if (!bp) {
  1985. return -ENOENT;
  1986. }
  1987. if (bp->use_count > 1) {
  1988. bp->use_count--;
  1989. return 0;
  1990. }
  1991. err = kvm_arch_remove_sw_breakpoint(cpu, bp);
  1992. if (err) {
  1993. return err;
  1994. }
  1995. QTAILQ_REMOVE(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
  1996. g_free(bp);
  1997. } else {
  1998. err = kvm_arch_remove_hw_breakpoint(addr, len, type);
  1999. if (err) {
  2000. return err;
  2001. }
  2002. }
  2003. CPU_FOREACH(cpu) {
  2004. err = kvm_update_guest_debug(cpu, 0);
  2005. if (err) {
  2006. return err;
  2007. }
  2008. }
  2009. return 0;
  2010. }
  2011. void kvm_remove_all_breakpoints(CPUState *cpu)
  2012. {
  2013. struct kvm_sw_breakpoint *bp, *next;
  2014. KVMState *s = cpu->kvm_state;
  2015. CPUState *tmpcpu;
  2016. QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
  2017. if (kvm_arch_remove_sw_breakpoint(cpu, bp) != 0) {
  2018. /* Try harder to find a CPU that currently sees the breakpoint. */
  2019. CPU_FOREACH(tmpcpu) {
  2020. if (kvm_arch_remove_sw_breakpoint(tmpcpu, bp) == 0) {
  2021. break;
  2022. }
  2023. }
  2024. }
  2025. QTAILQ_REMOVE(&s->kvm_sw_breakpoints, bp, entry);
  2026. g_free(bp);
  2027. }
  2028. kvm_arch_remove_all_hw_breakpoints();
  2029. CPU_FOREACH(cpu) {
  2030. kvm_update_guest_debug(cpu, 0);
  2031. }
  2032. }
  2033. #else /* !KVM_CAP_SET_GUEST_DEBUG */
  2034. int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
  2035. {
  2036. return -EINVAL;
  2037. }
  2038. int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
  2039. target_ulong len, int type)
  2040. {
  2041. return -EINVAL;
  2042. }
  2043. int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
  2044. target_ulong len, int type)
  2045. {
  2046. return -EINVAL;
  2047. }
  2048. void kvm_remove_all_breakpoints(CPUState *cpu)
  2049. {
  2050. }
  2051. #endif /* !KVM_CAP_SET_GUEST_DEBUG */
  2052. static int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset)
  2053. {
  2054. KVMState *s = kvm_state;
  2055. struct kvm_signal_mask *sigmask;
  2056. int r;
  2057. sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
  2058. sigmask->len = s->sigmask_len;
  2059. memcpy(sigmask->sigset, sigset, sizeof(*sigset));
  2060. r = kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, sigmask);
  2061. g_free(sigmask);
  2062. return r;
  2063. }
  2064. static void kvm_ipi_signal(int sig)
  2065. {
  2066. if (current_cpu) {
  2067. assert(kvm_immediate_exit);
  2068. kvm_cpu_kick(current_cpu);
  2069. }
  2070. }
  2071. void kvm_init_cpu_signals(CPUState *cpu)
  2072. {
  2073. int r;
  2074. sigset_t set;
  2075. struct sigaction sigact;
  2076. memset(&sigact, 0, sizeof(sigact));
  2077. sigact.sa_handler = kvm_ipi_signal;
  2078. sigaction(SIG_IPI, &sigact, NULL);
  2079. pthread_sigmask(SIG_BLOCK, NULL, &set);
  2080. #if defined KVM_HAVE_MCE_INJECTION
  2081. sigdelset(&set, SIGBUS);
  2082. pthread_sigmask(SIG_SETMASK, &set, NULL);
  2083. #endif
  2084. sigdelset(&set, SIG_IPI);
  2085. if (kvm_immediate_exit) {
  2086. r = pthread_sigmask(SIG_SETMASK, &set, NULL);
  2087. } else {
  2088. r = kvm_set_signal_mask(cpu, &set);
  2089. }
  2090. if (r) {
  2091. fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
  2092. exit(1);
  2093. }
  2094. }
  2095. /* Called asynchronously in VCPU thread. */
  2096. int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
  2097. {
  2098. #ifdef KVM_HAVE_MCE_INJECTION
  2099. if (have_sigbus_pending) {
  2100. return 1;
  2101. }
  2102. have_sigbus_pending = true;
  2103. pending_sigbus_addr = addr;
  2104. pending_sigbus_code = code;
  2105. atomic_set(&cpu->exit_request, 1);
  2106. return 0;
  2107. #else
  2108. return 1;
  2109. #endif
  2110. }
  2111. /* Called synchronously (via signalfd) in main thread. */
  2112. int kvm_on_sigbus(int code, void *addr)
  2113. {
  2114. #ifdef KVM_HAVE_MCE_INJECTION
  2115. /* Action required MCE kills the process if SIGBUS is blocked. Because
  2116. * that's what happens in the I/O thread, where we handle MCE via signalfd,
  2117. * we can only get action optional here.
  2118. */
  2119. assert(code != BUS_MCEERR_AR);
  2120. kvm_arch_on_sigbus_vcpu(first_cpu, code, addr);
  2121. return 0;
  2122. #else
  2123. return 1;
  2124. #endif
  2125. }
  2126. int kvm_create_device(KVMState *s, uint64_t type, bool test)
  2127. {
  2128. int ret;
  2129. struct kvm_create_device create_dev;
  2130. create_dev.type = type;
  2131. create_dev.fd = -1;
  2132. create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
  2133. if (!kvm_check_extension(s, KVM_CAP_DEVICE_CTRL)) {
  2134. return -ENOTSUP;
  2135. }
  2136. ret = kvm_vm_ioctl(s, KVM_CREATE_DEVICE, &create_dev);
  2137. if (ret) {
  2138. return ret;
  2139. }
  2140. return test ? 0 : create_dev.fd;
  2141. }
  2142. bool kvm_device_supported(int vmfd, uint64_t type)
  2143. {
  2144. struct kvm_create_device create_dev = {
  2145. .type = type,
  2146. .fd = -1,
  2147. .flags = KVM_CREATE_DEVICE_TEST,
  2148. };
  2149. if (ioctl(vmfd, KVM_CHECK_EXTENSION, KVM_CAP_DEVICE_CTRL) <= 0) {
  2150. return false;
  2151. }
  2152. return (ioctl(vmfd, KVM_CREATE_DEVICE, &create_dev) >= 0);
  2153. }
  2154. int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source)
  2155. {
  2156. struct kvm_one_reg reg;
  2157. int r;
  2158. reg.id = id;
  2159. reg.addr = (uintptr_t) source;
  2160. r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
  2161. if (r) {
  2162. trace_kvm_failed_reg_set(id, strerror(-r));
  2163. }
  2164. return r;
  2165. }
  2166. int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
  2167. {
  2168. struct kvm_one_reg reg;
  2169. int r;
  2170. reg.id = id;
  2171. reg.addr = (uintptr_t) target;
  2172. r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
  2173. if (r) {
  2174. trace_kvm_failed_reg_get(id, strerror(-r));
  2175. }
  2176. return r;
  2177. }
  2178. static void kvm_accel_class_init(ObjectClass *oc, void *data)
  2179. {
  2180. AccelClass *ac = ACCEL_CLASS(oc);
  2181. ac->name = "KVM";
  2182. ac->init_machine = kvm_init;
  2183. ac->allowed = &kvm_allowed;
  2184. }
  2185. static const TypeInfo kvm_accel_type = {
  2186. .name = TYPE_KVM_ACCEL,
  2187. .parent = TYPE_ACCEL,
  2188. .class_init = kvm_accel_class_init,
  2189. .instance_size = sizeof(KVMState),
  2190. };
  2191. static void kvm_type_init(void)
  2192. {
  2193. type_register_static(&kvm_accel_type);
  2194. }
  2195. type_init(kvm_type_init);