xen_evtchn.c 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394
  1. /*
  2. * QEMU Xen emulation: Event channel support
  3. *
  4. * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  5. *
  6. * Authors: David Woodhouse <dwmw2@infradead.org>
  7. *
  8. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  9. * See the COPYING file in the top-level directory.
  10. */
  11. #include "qemu/osdep.h"
  12. #include "qemu/host-utils.h"
  13. #include "qemu/module.h"
  14. #include "qemu/lockable.h"
  15. #include "qemu/main-loop.h"
  16. #include "qemu/log.h"
  17. #include "qemu/error-report.h"
  18. #include "monitor/monitor.h"
  19. #include "monitor/hmp.h"
  20. #include "qapi/error.h"
  21. #include "qapi/qapi-commands-misc-target.h"
  22. #include "qobject/qdict.h"
  23. #include "qom/object.h"
  24. #include "exec/target_page.h"
  25. #include "exec/address-spaces.h"
  26. #include "migration/vmstate.h"
  27. #include "trace.h"
  28. #include "hw/sysbus.h"
  29. #include "hw/xen/xen.h"
  30. #include "hw/i386/x86.h"
  31. #include "hw/i386/pc.h"
  32. #include "hw/pci/pci.h"
  33. #include "hw/pci/msi.h"
  34. #include "hw/pci/msix.h"
  35. #include "hw/irq.h"
  36. #include "hw/xen/xen_backend_ops.h"
  37. #include "xen_evtchn.h"
  38. #include "xen_overlay.h"
  39. #include "xen_xenstore.h"
  40. #include "system/kvm.h"
  41. #include "system/kvm_xen.h"
  42. #include <linux/kvm.h>
  43. #include <sys/eventfd.h>
  44. #include "hw/xen/interface/memory.h"
  45. #include "hw/xen/interface/hvm/params.h"
  46. /* XX: For kvm_update_msi_routes_all() */
  47. #include "target/i386/kvm/kvm_i386.h"
  48. #define TYPE_XEN_EVTCHN "xen-evtchn"
  49. OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
  50. typedef struct XenEvtchnPort {
  51. uint32_t vcpu; /* Xen/ACPI vcpu_id */
  52. uint16_t type; /* EVTCHNSTAT_xxxx */
  53. union {
  54. uint16_t val; /* raw value for serialization etc. */
  55. uint16_t pirq;
  56. uint16_t virq;
  57. struct {
  58. uint16_t port:15;
  59. uint16_t to_qemu:1; /* Only two targets; qemu or loopback */
  60. } interdomain;
  61. } u;
  62. } XenEvtchnPort;
  63. /* 32-bit compatibility definitions, also used natively in 32-bit build */
  64. struct compat_arch_vcpu_info {
  65. unsigned int cr2;
  66. unsigned int pad[5];
  67. };
  68. struct compat_vcpu_info {
  69. uint8_t evtchn_upcall_pending;
  70. uint8_t evtchn_upcall_mask;
  71. uint16_t pad;
  72. uint32_t evtchn_pending_sel;
  73. struct compat_arch_vcpu_info arch;
  74. struct vcpu_time_info time;
  75. }; /* 64 bytes (x86) */
  76. struct compat_arch_shared_info {
  77. unsigned int max_pfn;
  78. unsigned int pfn_to_mfn_frame_list_list;
  79. unsigned int nmi_reason;
  80. unsigned int p2m_cr3;
  81. unsigned int p2m_vaddr;
  82. unsigned int p2m_generation;
  83. uint32_t wc_sec_hi;
  84. };
  85. struct compat_shared_info {
  86. struct compat_vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];
  87. uint32_t evtchn_pending[32];
  88. uint32_t evtchn_mask[32];
  89. uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */
  90. uint32_t wc_sec;
  91. uint32_t wc_nsec;
  92. struct compat_arch_shared_info arch;
  93. };
  94. #define COMPAT_EVTCHN_2L_NR_CHANNELS 1024
  95. /* Local private implementation of struct xenevtchn_handle */
  96. struct xenevtchn_handle {
  97. evtchn_port_t be_port;
  98. evtchn_port_t guest_port; /* Or zero for unbound */
  99. int fd;
  100. };
  101. /*
  102. * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
  103. * insane enough to think about guest-transparent live migration from actual
  104. * Xen to QEMU, and ensuring that we can convert/consume the stream.
  105. */
  106. #define IRQ_UNBOUND -1
  107. #define IRQ_PT -2
  108. #define IRQ_MSI_EMU -3
  109. struct pirq_info {
  110. int gsi;
  111. uint16_t port;
  112. PCIDevice *dev;
  113. int vector;
  114. bool is_msix;
  115. bool is_masked;
  116. bool is_translated;
  117. };
  118. struct XenEvtchnState {
  119. /*< private >*/
  120. SysBusDevice busdev;
  121. /*< public >*/
  122. uint64_t callback_param;
  123. bool evtchn_in_kernel;
  124. bool setting_callback_gsi;
  125. int extern_gsi_level;
  126. uint32_t callback_gsi;
  127. QEMUBH *gsi_bh;
  128. QemuMutex port_lock;
  129. uint32_t nr_ports;
  130. XenEvtchnPort port_table[EVTCHN_2L_NR_CHANNELS];
  131. /* Connected to the system GSIs for raising callback as GSI / INTx */
  132. unsigned int nr_callback_gsis;
  133. qemu_irq *callback_gsis;
  134. struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
  135. uint32_t nr_pirqs;
  136. /* Bitmap of allocated PIRQs (serialized) */
  137. uint16_t nr_pirq_inuse_words;
  138. uint64_t *pirq_inuse_bitmap;
  139. /* GSI → PIRQ mapping (serialized) */
  140. uint16_t gsi_pirq[IOAPIC_NUM_PINS];
  141. /* Per-GSI assertion state (serialized) */
  142. uint32_t pirq_gsi_set;
  143. /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
  144. struct pirq_info *pirq;
  145. };
  146. #define pirq_inuse_word(s, pirq) (s->pirq_inuse_bitmap[((pirq) / 64)])
  147. #define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
  148. #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
  149. struct XenEvtchnState *xen_evtchn_singleton;
  150. /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
  151. #define CALLBACK_VIA_TYPE_SHIFT 56
  152. static void unbind_backend_ports(XenEvtchnState *s);
  153. static int xen_evtchn_pre_load(void *opaque)
  154. {
  155. XenEvtchnState *s = opaque;
  156. /* Unbind all the backend-side ports; they need to rebind */
  157. unbind_backend_ports(s);
  158. /* It'll be leaked otherwise. */
  159. g_free(s->pirq_inuse_bitmap);
  160. s->pirq_inuse_bitmap = NULL;
  161. return 0;
  162. }
  163. static int xen_evtchn_post_load(void *opaque, int version_id)
  164. {
  165. XenEvtchnState *s = opaque;
  166. uint32_t i;
  167. if (s->callback_param) {
  168. xen_evtchn_set_callback_param(s->callback_param);
  169. }
  170. /* Rebuild s->pirq[].port mapping */
  171. for (i = 0; i < s->nr_ports; i++) {
  172. XenEvtchnPort *p = &s->port_table[i];
  173. if (p->type == EVTCHNSTAT_pirq) {
  174. assert(p->u.pirq);
  175. assert(p->u.pirq < s->nr_pirqs);
  176. /*
  177. * Set the gsi to IRQ_UNBOUND; it may be changed to an actual
  178. * GSI# below, or to IRQ_MSI_EMU when the MSI table snooping
  179. * catches up with it.
  180. */
  181. s->pirq[p->u.pirq].gsi = IRQ_UNBOUND;
  182. s->pirq[p->u.pirq].port = i;
  183. }
  184. }
  185. /* Rebuild s->pirq[].gsi mapping */
  186. for (i = 0; i < IOAPIC_NUM_PINS; i++) {
  187. if (s->gsi_pirq[i]) {
  188. s->pirq[s->gsi_pirq[i]].gsi = i;
  189. }
  190. }
  191. return 0;
  192. }
  193. static bool xen_evtchn_is_needed(void *opaque)
  194. {
  195. return xen_mode == XEN_EMULATE;
  196. }
  197. static const VMStateDescription xen_evtchn_port_vmstate = {
  198. .name = "xen_evtchn_port",
  199. .version_id = 1,
  200. .minimum_version_id = 1,
  201. .fields = (const VMStateField[]) {
  202. VMSTATE_UINT32(vcpu, XenEvtchnPort),
  203. VMSTATE_UINT16(type, XenEvtchnPort),
  204. VMSTATE_UINT16(u.val, XenEvtchnPort),
  205. VMSTATE_END_OF_LIST()
  206. }
  207. };
  208. static const VMStateDescription xen_evtchn_vmstate = {
  209. .name = "xen_evtchn",
  210. .version_id = 1,
  211. .minimum_version_id = 1,
  212. .needed = xen_evtchn_is_needed,
  213. .pre_load = xen_evtchn_pre_load,
  214. .post_load = xen_evtchn_post_load,
  215. .fields = (const VMStateField[]) {
  216. VMSTATE_UINT64(callback_param, XenEvtchnState),
  217. VMSTATE_UINT32(nr_ports, XenEvtchnState),
  218. VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
  219. xen_evtchn_port_vmstate, XenEvtchnPort),
  220. VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, IOAPIC_NUM_PINS),
  221. VMSTATE_VARRAY_UINT16_ALLOC(pirq_inuse_bitmap, XenEvtchnState,
  222. nr_pirq_inuse_words, 0,
  223. vmstate_info_uint64, uint64_t),
  224. VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
  225. VMSTATE_END_OF_LIST()
  226. }
  227. };
  228. static void xen_evtchn_class_init(ObjectClass *klass, void *data)
  229. {
  230. DeviceClass *dc = DEVICE_CLASS(klass);
  231. dc->vmsd = &xen_evtchn_vmstate;
  232. }
  233. static const TypeInfo xen_evtchn_info = {
  234. .name = TYPE_XEN_EVTCHN,
  235. .parent = TYPE_SYS_BUS_DEVICE,
  236. .instance_size = sizeof(XenEvtchnState),
  237. .class_init = xen_evtchn_class_init,
  238. };
  239. static struct evtchn_backend_ops emu_evtchn_backend_ops = {
  240. .open = xen_be_evtchn_open,
  241. .bind_interdomain = xen_be_evtchn_bind_interdomain,
  242. .unbind = xen_be_evtchn_unbind,
  243. .close = xen_be_evtchn_close,
  244. .get_fd = xen_be_evtchn_fd,
  245. .notify = xen_be_evtchn_notify,
  246. .unmask = xen_be_evtchn_unmask,
  247. .pending = xen_be_evtchn_pending,
  248. };
  249. static void gsi_assert_bh(void *opaque)
  250. {
  251. struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
  252. if (vi) {
  253. xen_evtchn_set_callback_level(!!vi->evtchn_upcall_pending);
  254. }
  255. }
  256. void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
  257. {
  258. XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
  259. -1, NULL));
  260. int i;
  261. xen_evtchn_singleton = s;
  262. qemu_mutex_init(&s->port_lock);
  263. s->gsi_bh = aio_bh_new(qemu_get_aio_context(), gsi_assert_bh, s);
  264. /*
  265. * These are the *output* GSI from event channel support, for
  266. * signalling CPU0's events via GSI or PCI INTx instead of the
  267. * per-CPU vector. We create a *set* of irqs and connect one to
  268. * each of the system GSIs which were passed in from the platform
  269. * code, and then just trigger the right one as appropriate from
  270. * xen_evtchn_set_callback_level().
  271. */
  272. s->nr_callback_gsis = nr_gsis;
  273. s->callback_gsis = g_new0(qemu_irq, nr_gsis);
  274. for (i = 0; i < nr_gsis; i++) {
  275. sysbus_init_irq(SYS_BUS_DEVICE(s), &s->callback_gsis[i]);
  276. sysbus_connect_irq(SYS_BUS_DEVICE(s), i, system_gsis[i]);
  277. }
  278. /*
  279. * The Xen scheme for encoding PIRQ# into an MSI message is not
  280. * compatible with 32-bit MSI, as it puts the high bits of the
  281. * PIRQ# into the high bits of the MSI message address, instead of
  282. * using the Extended Destination ID in address bits 4-11 which
  283. * perhaps would have been a better choice.
  284. *
  285. * To keep life simple, kvm_accel_instance_init() initialises the
  286. * default to 256. which conveniently doesn't need to set anything
  287. * outside the low 32 bits of the address. It can be increased by
  288. * setting the xen-evtchn-max-pirq property.
  289. */
  290. s->nr_pirqs = kvm_xen_get_evtchn_max_pirq();
  291. s->nr_pirq_inuse_words = DIV_ROUND_UP(s->nr_pirqs, 64);
  292. s->pirq_inuse_bitmap = g_new0(uint64_t, s->nr_pirq_inuse_words);
  293. s->pirq = g_new0(struct pirq_info, s->nr_pirqs);
  294. /* Set event channel functions for backend drivers to use */
  295. xen_evtchn_ops = &emu_evtchn_backend_ops;
  296. }
  297. static void xen_evtchn_register_types(void)
  298. {
  299. type_register_static(&xen_evtchn_info);
  300. }
  301. type_init(xen_evtchn_register_types)
  302. static int set_callback_pci_intx(XenEvtchnState *s, uint64_t param)
  303. {
  304. PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
  305. uint8_t pin = param & 3;
  306. uint8_t devfn = (param >> 8) & 0xff;
  307. uint16_t bus = (param >> 16) & 0xffff;
  308. uint16_t domain = (param >> 32) & 0xffff;
  309. PCIDevice *pdev;
  310. PCIINTxRoute r;
  311. if (domain || !pcms) {
  312. return 0;
  313. }
  314. pdev = pci_find_device(pcms->pcibus, bus, devfn);
  315. if (!pdev) {
  316. return 0;
  317. }
  318. r = pci_device_route_intx_to_irq(pdev, pin);
  319. if (r.mode != PCI_INTX_ENABLED) {
  320. return 0;
  321. }
  322. /*
  323. * Hm, can we be notified of INTX routing changes? Not without
  324. * *owning* the device and being allowed to overwrite its own
  325. * ->intx_routing_notifier, AFAICT. So let's not.
  326. */
  327. return r.irq;
  328. }
  329. void xen_evtchn_set_callback_level(int level)
  330. {
  331. XenEvtchnState *s = xen_evtchn_singleton;
  332. if (!s) {
  333. return;
  334. }
  335. /*
  336. * We get to this function in a number of ways:
  337. *
  338. * • From I/O context, via PV backend drivers sending a notification to
  339. * the guest.
  340. *
  341. * • From guest vCPU context, via loopback interdomain event channels
  342. * (or theoretically even IPIs but guests don't use those with GSI
  343. * delivery because that's pointless. We don't want a malicious guest
  344. * to be able to trigger a deadlock though, so we can't rule it out.)
  345. *
  346. * • From guest vCPU context when the HVM_PARAM_CALLBACK_IRQ is being
  347. * configured.
  348. *
  349. * • From guest vCPU context in the KVM exit handler, if the upcall
  350. * pending flag has been cleared and the GSI needs to be deasserted.
  351. *
  352. * • Maybe in future, in an interrupt ack/eoi notifier when the GSI has
  353. * been acked in the irqchip.
  354. *
  355. * Whichever context we come from if we aren't already holding the BQL
  356. * then e can't take it now, as we may already hold s->port_lock. So
  357. * trigger the BH to set the IRQ for us instead of doing it immediately.
  358. *
  359. * In the HVM_PARAM_CALLBACK_IRQ and KVM exit handler cases, the caller
  360. * will deliberately take the BQL because they want the change to take
  361. * effect immediately. That just leaves interdomain loopback as the case
  362. * which uses the BH.
  363. */
  364. if (!bql_locked()) {
  365. qemu_bh_schedule(s->gsi_bh);
  366. return;
  367. }
  368. if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
  369. /*
  370. * Ugly, but since we hold the BQL we can set this flag so that
  371. * xen_evtchn_set_gsi() can tell the difference between this code
  372. * setting the GSI, and an external device (PCI INTx) doing so.
  373. */
  374. s->setting_callback_gsi = true;
  375. /* Do not deassert the line if an external device is asserting it. */
  376. qemu_set_irq(s->callback_gsis[s->callback_gsi],
  377. level || s->extern_gsi_level);
  378. s->setting_callback_gsi = false;
  379. /*
  380. * If the callback GSI is the only one asserted, ensure the status
  381. * is polled for deassertion in kvm_arch_post_run().
  382. */
  383. if (level && !s->extern_gsi_level) {
  384. kvm_xen_set_callback_asserted();
  385. }
  386. }
  387. }
  388. int xen_evtchn_set_callback_param(uint64_t param)
  389. {
  390. XenEvtchnState *s = xen_evtchn_singleton;
  391. struct kvm_xen_hvm_attr xa = {
  392. .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
  393. .u.vector = 0,
  394. };
  395. bool in_kernel = false;
  396. uint32_t gsi = 0;
  397. int type = param >> CALLBACK_VIA_TYPE_SHIFT;
  398. int ret;
  399. if (!s) {
  400. return -ENOTSUP;
  401. }
  402. /*
  403. * We need the BQL because set_callback_pci_intx() may call into PCI code,
  404. * and because we may need to manipulate the old and new GSI levels.
  405. */
  406. assert(bql_locked());
  407. qemu_mutex_lock(&s->port_lock);
  408. switch (type) {
  409. case HVM_PARAM_CALLBACK_TYPE_VECTOR: {
  410. xa.u.vector = (uint8_t)param,
  411. ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
  412. if (!ret && kvm_xen_has_cap(EVTCHN_SEND)) {
  413. in_kernel = true;
  414. }
  415. gsi = 0;
  416. break;
  417. }
  418. case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
  419. gsi = set_callback_pci_intx(s, param);
  420. ret = gsi ? 0 : -EINVAL;
  421. break;
  422. case HVM_PARAM_CALLBACK_TYPE_GSI:
  423. gsi = (uint32_t)param;
  424. ret = 0;
  425. break;
  426. default:
  427. /* Xen doesn't return error even if you set something bogus */
  428. ret = 0;
  429. break;
  430. }
  431. /* If the guest has set a per-vCPU callback vector, prefer that. */
  432. if (gsi && kvm_xen_has_vcpu_callback_vector()) {
  433. in_kernel = kvm_xen_has_cap(EVTCHN_SEND);
  434. gsi = 0;
  435. }
  436. if (!ret) {
  437. /* If vector delivery was turned *off* then tell the kernel */
  438. if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) ==
  439. HVM_PARAM_CALLBACK_TYPE_VECTOR && !xa.u.vector) {
  440. kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
  441. }
  442. s->callback_param = param;
  443. s->evtchn_in_kernel = in_kernel;
  444. if (gsi != s->callback_gsi) {
  445. struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
  446. xen_evtchn_set_callback_level(0);
  447. s->callback_gsi = gsi;
  448. if (gsi && vi && vi->evtchn_upcall_pending) {
  449. kvm_xen_inject_vcpu_callback_vector(0, type);
  450. }
  451. }
  452. }
  453. qemu_mutex_unlock(&s->port_lock);
  454. return ret;
  455. }
  456. static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
  457. {
  458. int type = s->callback_param >> CALLBACK_VIA_TYPE_SHIFT;
  459. kvm_xen_inject_vcpu_callback_vector(vcpu, type);
  460. }
  461. static void deassign_kernel_port(evtchn_port_t port)
  462. {
  463. struct kvm_xen_hvm_attr ha;
  464. int ret;
  465. ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
  466. ha.u.evtchn.send_port = port;
  467. ha.u.evtchn.flags = KVM_XEN_EVTCHN_DEASSIGN;
  468. ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
  469. if (ret) {
  470. qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
  471. port, strerror(ret));
  472. }
  473. }
  474. static int assign_kernel_port(uint16_t type, evtchn_port_t port,
  475. uint32_t vcpu_id)
  476. {
  477. CPUState *cpu = qemu_get_cpu(vcpu_id);
  478. struct kvm_xen_hvm_attr ha;
  479. if (!cpu) {
  480. return -ENOENT;
  481. }
  482. ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
  483. ha.u.evtchn.send_port = port;
  484. ha.u.evtchn.type = type;
  485. ha.u.evtchn.flags = 0;
  486. ha.u.evtchn.deliver.port.port = port;
  487. ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
  488. ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
  489. return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
  490. }
  491. static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
  492. {
  493. struct kvm_xen_hvm_attr ha;
  494. ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
  495. ha.u.evtchn.send_port = port;
  496. ha.u.evtchn.type = type;
  497. ha.u.evtchn.flags = 0;
  498. ha.u.evtchn.deliver.eventfd.port = 0;
  499. ha.u.evtchn.deliver.eventfd.fd = fd;
  500. return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
  501. }
  502. static bool valid_port(evtchn_port_t port)
  503. {
  504. if (!port) {
  505. return false;
  506. }
  507. if (xen_is_long_mode()) {
  508. return port < EVTCHN_2L_NR_CHANNELS;
  509. } else {
  510. return port < COMPAT_EVTCHN_2L_NR_CHANNELS;
  511. }
  512. }
  513. static bool valid_vcpu(uint32_t vcpu)
  514. {
  515. return !!qemu_get_cpu(vcpu);
  516. }
  517. static void unbind_backend_ports(XenEvtchnState *s)
  518. {
  519. XenEvtchnPort *p;
  520. int i;
  521. for (i = 1; i < s->nr_ports; i++) {
  522. p = &s->port_table[i];
  523. if (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu) {
  524. evtchn_port_t be_port = p->u.interdomain.port;
  525. if (s->be_handles[be_port]) {
  526. /* This part will be overwritten on the load anyway. */
  527. p->type = EVTCHNSTAT_unbound;
  528. p->u.interdomain.port = 0;
  529. /* Leave the backend port open and unbound too. */
  530. if (kvm_xen_has_cap(EVTCHN_SEND)) {
  531. deassign_kernel_port(i);
  532. }
  533. s->be_handles[be_port]->guest_port = 0;
  534. }
  535. }
  536. }
  537. }
  538. int xen_evtchn_status_op(struct evtchn_status *status)
  539. {
  540. XenEvtchnState *s = xen_evtchn_singleton;
  541. XenEvtchnPort *p;
  542. if (!s) {
  543. return -ENOTSUP;
  544. }
  545. if (status->dom != DOMID_SELF && status->dom != xen_domid) {
  546. return -ESRCH;
  547. }
  548. if (!valid_port(status->port)) {
  549. return -EINVAL;
  550. }
  551. qemu_mutex_lock(&s->port_lock);
  552. p = &s->port_table[status->port];
  553. status->status = p->type;
  554. status->vcpu = p->vcpu;
  555. switch (p->type) {
  556. case EVTCHNSTAT_unbound:
  557. status->u.unbound.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
  558. : xen_domid;
  559. break;
  560. case EVTCHNSTAT_interdomain:
  561. status->u.interdomain.dom = p->u.interdomain.to_qemu ? DOMID_QEMU
  562. : xen_domid;
  563. status->u.interdomain.port = p->u.interdomain.port;
  564. break;
  565. case EVTCHNSTAT_pirq:
  566. status->u.pirq = p->u.pirq;
  567. break;
  568. case EVTCHNSTAT_virq:
  569. status->u.virq = p->u.virq;
  570. break;
  571. }
  572. qemu_mutex_unlock(&s->port_lock);
  573. return 0;
  574. }
  575. /*
  576. * Never thought I'd hear myself say this, but C++ templates would be
  577. * kind of nice here.
  578. *
  579. * template<class T> static int do_unmask_port(T *shinfo, ...);
  580. */
  581. static int do_unmask_port_lm(XenEvtchnState *s, evtchn_port_t port,
  582. bool do_unmask, struct shared_info *shinfo,
  583. struct vcpu_info *vcpu_info)
  584. {
  585. const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
  586. typeof(shinfo->evtchn_pending[0]) mask;
  587. int idx = port / bits_per_word;
  588. int offset = port % bits_per_word;
  589. mask = 1UL << offset;
  590. if (idx >= bits_per_word) {
  591. return -EINVAL;
  592. }
  593. if (do_unmask) {
  594. /*
  595. * If this is a true unmask operation, clear the mask bit. If
  596. * it was already unmasked, we have nothing further to do.
  597. */
  598. if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
  599. return 0;
  600. }
  601. } else {
  602. /*
  603. * This is a pseudo-unmask for affinity changes. We don't
  604. * change the mask bit, and if it's *masked* we have nothing
  605. * else to do.
  606. */
  607. if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
  608. return 0;
  609. }
  610. }
  611. /* If the event was not pending, we're done. */
  612. if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
  613. return 0;
  614. }
  615. /* Now on to the vcpu_info evtchn_pending_sel index... */
  616. mask = 1UL << idx;
  617. /* If a port in this word was already pending for this vCPU, all done. */
  618. if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
  619. return 0;
  620. }
  621. /* Set evtchn_upcall_pending for this vCPU */
  622. if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
  623. return 0;
  624. }
  625. inject_callback(s, s->port_table[port].vcpu);
  626. return 0;
  627. }
  628. static int do_unmask_port_compat(XenEvtchnState *s, evtchn_port_t port,
  629. bool do_unmask,
  630. struct compat_shared_info *shinfo,
  631. struct compat_vcpu_info *vcpu_info)
  632. {
  633. const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
  634. typeof(shinfo->evtchn_pending[0]) mask;
  635. int idx = port / bits_per_word;
  636. int offset = port % bits_per_word;
  637. mask = 1UL << offset;
  638. if (idx >= bits_per_word) {
  639. return -EINVAL;
  640. }
  641. if (do_unmask) {
  642. /*
  643. * If this is a true unmask operation, clear the mask bit. If
  644. * it was already unmasked, we have nothing further to do.
  645. */
  646. if (!((qatomic_fetch_and(&shinfo->evtchn_mask[idx], ~mask) & mask))) {
  647. return 0;
  648. }
  649. } else {
  650. /*
  651. * This is a pseudo-unmask for affinity changes. We don't
  652. * change the mask bit, and if it's *masked* we have nothing
  653. * else to do.
  654. */
  655. if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
  656. return 0;
  657. }
  658. }
  659. /* If the event was not pending, we're done. */
  660. if (!(qatomic_fetch_or(&shinfo->evtchn_pending[idx], 0) & mask)) {
  661. return 0;
  662. }
  663. /* Now on to the vcpu_info evtchn_pending_sel index... */
  664. mask = 1UL << idx;
  665. /* If a port in this word was already pending for this vCPU, all done. */
  666. if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
  667. return 0;
  668. }
  669. /* Set evtchn_upcall_pending for this vCPU */
  670. if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
  671. return 0;
  672. }
  673. inject_callback(s, s->port_table[port].vcpu);
  674. return 0;
  675. }
  676. static int unmask_port(XenEvtchnState *s, evtchn_port_t port, bool do_unmask)
  677. {
  678. void *vcpu_info, *shinfo;
  679. if (s->port_table[port].type == EVTCHNSTAT_closed) {
  680. return -EINVAL;
  681. }
  682. shinfo = xen_overlay_get_shinfo_ptr();
  683. if (!shinfo) {
  684. return -ENOTSUP;
  685. }
  686. vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
  687. if (!vcpu_info) {
  688. return -EINVAL;
  689. }
  690. if (xen_is_long_mode()) {
  691. return do_unmask_port_lm(s, port, do_unmask, shinfo, vcpu_info);
  692. } else {
  693. return do_unmask_port_compat(s, port, do_unmask, shinfo, vcpu_info);
  694. }
  695. }
  696. static int do_set_port_lm(XenEvtchnState *s, evtchn_port_t port,
  697. struct shared_info *shinfo,
  698. struct vcpu_info *vcpu_info)
  699. {
  700. const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
  701. typeof(shinfo->evtchn_pending[0]) mask;
  702. int idx = port / bits_per_word;
  703. int offset = port % bits_per_word;
  704. mask = 1UL << offset;
  705. if (idx >= bits_per_word) {
  706. return -EINVAL;
  707. }
  708. /* Update the pending bit itself. If it was already set, we're done. */
  709. if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
  710. return 0;
  711. }
  712. /* Check if it's masked. */
  713. if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
  714. return 0;
  715. }
  716. /* Now on to the vcpu_info evtchn_pending_sel index... */
  717. mask = 1UL << idx;
  718. /* If a port in this word was already pending for this vCPU, all done. */
  719. if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
  720. return 0;
  721. }
  722. /* Set evtchn_upcall_pending for this vCPU */
  723. if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
  724. return 0;
  725. }
  726. inject_callback(s, s->port_table[port].vcpu);
  727. return 0;
  728. }
  729. static int do_set_port_compat(XenEvtchnState *s, evtchn_port_t port,
  730. struct compat_shared_info *shinfo,
  731. struct compat_vcpu_info *vcpu_info)
  732. {
  733. const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
  734. typeof(shinfo->evtchn_pending[0]) mask;
  735. int idx = port / bits_per_word;
  736. int offset = port % bits_per_word;
  737. mask = 1UL << offset;
  738. if (idx >= bits_per_word) {
  739. return -EINVAL;
  740. }
  741. /* Update the pending bit itself. If it was already set, we're done. */
  742. if (qatomic_fetch_or(&shinfo->evtchn_pending[idx], mask) & mask) {
  743. return 0;
  744. }
  745. /* Check if it's masked. */
  746. if (qatomic_fetch_or(&shinfo->evtchn_mask[idx], 0) & mask) {
  747. return 0;
  748. }
  749. /* Now on to the vcpu_info evtchn_pending_sel index... */
  750. mask = 1UL << idx;
  751. /* If a port in this word was already pending for this vCPU, all done. */
  752. if (qatomic_fetch_or(&vcpu_info->evtchn_pending_sel, mask) & mask) {
  753. return 0;
  754. }
  755. /* Set evtchn_upcall_pending for this vCPU */
  756. if (qatomic_fetch_or(&vcpu_info->evtchn_upcall_pending, 1)) {
  757. return 0;
  758. }
  759. inject_callback(s, s->port_table[port].vcpu);
  760. return 0;
  761. }
  762. static int set_port_pending(XenEvtchnState *s, evtchn_port_t port)
  763. {
  764. void *vcpu_info, *shinfo;
  765. if (s->port_table[port].type == EVTCHNSTAT_closed) {
  766. return -EINVAL;
  767. }
  768. if (s->evtchn_in_kernel) {
  769. XenEvtchnPort *p = &s->port_table[port];
  770. CPUState *cpu = qemu_get_cpu(p->vcpu);
  771. struct kvm_irq_routing_xen_evtchn evt;
  772. if (!cpu) {
  773. return 0;
  774. }
  775. evt.port = port;
  776. evt.vcpu = kvm_arch_vcpu_id(cpu);
  777. evt.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
  778. return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_EVTCHN_SEND, &evt);
  779. }
  780. shinfo = xen_overlay_get_shinfo_ptr();
  781. if (!shinfo) {
  782. return -ENOTSUP;
  783. }
  784. vcpu_info = kvm_xen_get_vcpu_info_hva(s->port_table[port].vcpu);
  785. if (!vcpu_info) {
  786. return -EINVAL;
  787. }
  788. if (xen_is_long_mode()) {
  789. return do_set_port_lm(s, port, shinfo, vcpu_info);
  790. } else {
  791. return do_set_port_compat(s, port, shinfo, vcpu_info);
  792. }
  793. }
  794. static int clear_port_pending(XenEvtchnState *s, evtchn_port_t port)
  795. {
  796. void *p = xen_overlay_get_shinfo_ptr();
  797. if (!p) {
  798. return -ENOTSUP;
  799. }
  800. if (xen_is_long_mode()) {
  801. struct shared_info *shinfo = p;
  802. const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
  803. typeof(shinfo->evtchn_pending[0]) mask;
  804. int idx = port / bits_per_word;
  805. int offset = port % bits_per_word;
  806. mask = 1UL << offset;
  807. qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
  808. } else {
  809. struct compat_shared_info *shinfo = p;
  810. const int bits_per_word = BITS_PER_BYTE * sizeof(shinfo->evtchn_pending[0]);
  811. typeof(shinfo->evtchn_pending[0]) mask;
  812. int idx = port / bits_per_word;
  813. int offset = port % bits_per_word;
  814. mask = 1UL << offset;
  815. qatomic_fetch_and(&shinfo->evtchn_pending[idx], ~mask);
  816. }
  817. return 0;
  818. }
  819. static void free_port(XenEvtchnState *s, evtchn_port_t port)
  820. {
  821. s->port_table[port].type = EVTCHNSTAT_closed;
  822. s->port_table[port].u.val = 0;
  823. s->port_table[port].vcpu = 0;
  824. if (s->nr_ports == port + 1) {
  825. do {
  826. s->nr_ports--;
  827. } while (s->nr_ports &&
  828. s->port_table[s->nr_ports - 1].type == EVTCHNSTAT_closed);
  829. }
  830. /* Clear pending event to avoid unexpected behavior on re-bind. */
  831. clear_port_pending(s, port);
  832. }
  833. static int allocate_port(XenEvtchnState *s, uint32_t vcpu, uint16_t type,
  834. uint16_t val, evtchn_port_t *port)
  835. {
  836. evtchn_port_t p = 1;
  837. for (p = 1; valid_port(p); p++) {
  838. if (s->port_table[p].type == EVTCHNSTAT_closed) {
  839. s->port_table[p].vcpu = vcpu;
  840. s->port_table[p].type = type;
  841. s->port_table[p].u.val = val;
  842. *port = p;
  843. if (s->nr_ports < p + 1) {
  844. s->nr_ports = p + 1;
  845. }
  846. return 0;
  847. }
  848. }
  849. return -ENOSPC;
  850. }
  851. static bool virq_is_global(uint32_t virq)
  852. {
  853. switch (virq) {
  854. case VIRQ_TIMER:
  855. case VIRQ_DEBUG:
  856. case VIRQ_XENOPROF:
  857. case VIRQ_XENPMU:
  858. return false;
  859. default:
  860. return true;
  861. }
  862. }
  863. static int close_port(XenEvtchnState *s, evtchn_port_t port,
  864. bool *flush_kvm_routes)
  865. {
  866. XenEvtchnPort *p = &s->port_table[port];
  867. /* Because it *might* be a PIRQ port */
  868. assert(bql_locked());
  869. switch (p->type) {
  870. case EVTCHNSTAT_closed:
  871. return -ENOENT;
  872. case EVTCHNSTAT_pirq:
  873. s->pirq[p->u.pirq].port = 0;
  874. if (s->pirq[p->u.pirq].is_translated) {
  875. *flush_kvm_routes = true;
  876. }
  877. break;
  878. case EVTCHNSTAT_virq:
  879. kvm_xen_set_vcpu_virq(virq_is_global(p->u.virq) ? 0 : p->vcpu,
  880. p->u.virq, 0);
  881. break;
  882. case EVTCHNSTAT_ipi:
  883. if (s->evtchn_in_kernel) {
  884. deassign_kernel_port(port);
  885. }
  886. break;
  887. case EVTCHNSTAT_interdomain:
  888. if (p->u.interdomain.to_qemu) {
  889. uint16_t be_port = p->u.interdomain.port;
  890. struct xenevtchn_handle *xc = s->be_handles[be_port];
  891. if (xc) {
  892. if (kvm_xen_has_cap(EVTCHN_SEND)) {
  893. deassign_kernel_port(port);
  894. }
  895. xc->guest_port = 0;
  896. }
  897. } else {
  898. /* Loopback interdomain */
  899. XenEvtchnPort *rp = &s->port_table[p->u.interdomain.port];
  900. if (!valid_port(p->u.interdomain.port) ||
  901. rp->u.interdomain.port != port ||
  902. rp->type != EVTCHNSTAT_interdomain) {
  903. error_report("Inconsistent state for interdomain unbind");
  904. } else {
  905. /* Set the other end back to unbound */
  906. rp->type = EVTCHNSTAT_unbound;
  907. rp->u.interdomain.port = 0;
  908. }
  909. }
  910. break;
  911. default:
  912. break;
  913. }
  914. free_port(s, port);
  915. return 0;
  916. }
  917. int xen_evtchn_soft_reset(void)
  918. {
  919. XenEvtchnState *s = xen_evtchn_singleton;
  920. bool flush_kvm_routes = false;
  921. int i;
  922. if (!s) {
  923. return -ENOTSUP;
  924. }
  925. assert(bql_locked());
  926. qemu_mutex_lock(&s->port_lock);
  927. for (i = 0; i < s->nr_ports; i++) {
  928. close_port(s, i, &flush_kvm_routes);
  929. }
  930. qemu_mutex_unlock(&s->port_lock);
  931. if (flush_kvm_routes) {
  932. kvm_update_msi_routes_all(NULL, true, 0, 0);
  933. }
  934. return 0;
  935. }
  936. int xen_evtchn_reset_op(struct evtchn_reset *reset)
  937. {
  938. if (reset->dom != DOMID_SELF && reset->dom != xen_domid) {
  939. return -ESRCH;
  940. }
  941. BQL_LOCK_GUARD();
  942. return xen_evtchn_soft_reset();
  943. }
  944. int xen_evtchn_close_op(struct evtchn_close *close)
  945. {
  946. XenEvtchnState *s = xen_evtchn_singleton;
  947. bool flush_kvm_routes = false;
  948. int ret;
  949. if (!s) {
  950. return -ENOTSUP;
  951. }
  952. if (!valid_port(close->port)) {
  953. return -EINVAL;
  954. }
  955. BQL_LOCK_GUARD();
  956. qemu_mutex_lock(&s->port_lock);
  957. ret = close_port(s, close->port, &flush_kvm_routes);
  958. qemu_mutex_unlock(&s->port_lock);
  959. if (flush_kvm_routes) {
  960. kvm_update_msi_routes_all(NULL, true, 0, 0);
  961. }
  962. return ret;
  963. }
  964. int xen_evtchn_unmask_op(struct evtchn_unmask *unmask)
  965. {
  966. XenEvtchnState *s = xen_evtchn_singleton;
  967. int ret;
  968. if (!s) {
  969. return -ENOTSUP;
  970. }
  971. if (!valid_port(unmask->port)) {
  972. return -EINVAL;
  973. }
  974. qemu_mutex_lock(&s->port_lock);
  975. ret = unmask_port(s, unmask->port, true);
  976. qemu_mutex_unlock(&s->port_lock);
  977. return ret;
  978. }
  979. int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu)
  980. {
  981. XenEvtchnState *s = xen_evtchn_singleton;
  982. XenEvtchnPort *p;
  983. int ret = -EINVAL;
  984. if (!s) {
  985. return -ENOTSUP;
  986. }
  987. if (!valid_port(vcpu->port)) {
  988. return -EINVAL;
  989. }
  990. if (!valid_vcpu(vcpu->vcpu)) {
  991. return -ENOENT;
  992. }
  993. qemu_mutex_lock(&s->port_lock);
  994. p = &s->port_table[vcpu->port];
  995. if (p->type == EVTCHNSTAT_interdomain ||
  996. p->type == EVTCHNSTAT_unbound ||
  997. p->type == EVTCHNSTAT_pirq ||
  998. (p->type == EVTCHNSTAT_virq && virq_is_global(p->u.virq))) {
  999. /*
  1000. * unmask_port() with do_unmask==false will just raise the event
  1001. * on the new vCPU if the port was already pending.
  1002. */
  1003. p->vcpu = vcpu->vcpu;
  1004. unmask_port(s, vcpu->port, false);
  1005. ret = 0;
  1006. }
  1007. qemu_mutex_unlock(&s->port_lock);
  1008. return ret;
  1009. }
  1010. int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
  1011. {
  1012. XenEvtchnState *s = xen_evtchn_singleton;
  1013. int ret;
  1014. if (!s) {
  1015. return -ENOTSUP;
  1016. }
  1017. if (virq->virq >= NR_VIRQS) {
  1018. return -EINVAL;
  1019. }
  1020. /* Global VIRQ must be allocated on vCPU0 first */
  1021. if (virq_is_global(virq->virq) && virq->vcpu != 0) {
  1022. return -EINVAL;
  1023. }
  1024. if (!valid_vcpu(virq->vcpu)) {
  1025. return -ENOENT;
  1026. }
  1027. qemu_mutex_lock(&s->port_lock);
  1028. ret = allocate_port(s, virq->vcpu, EVTCHNSTAT_virq, virq->virq,
  1029. &virq->port);
  1030. if (!ret) {
  1031. ret = kvm_xen_set_vcpu_virq(virq->vcpu, virq->virq, virq->port);
  1032. if (ret) {
  1033. free_port(s, virq->port);
  1034. }
  1035. }
  1036. qemu_mutex_unlock(&s->port_lock);
  1037. return ret;
  1038. }
  1039. int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
  1040. {
  1041. XenEvtchnState *s = xen_evtchn_singleton;
  1042. int ret;
  1043. if (!s) {
  1044. return -ENOTSUP;
  1045. }
  1046. if (pirq->pirq >= s->nr_pirqs) {
  1047. return -EINVAL;
  1048. }
  1049. BQL_LOCK_GUARD();
  1050. if (s->pirq[pirq->pirq].port) {
  1051. return -EBUSY;
  1052. }
  1053. qemu_mutex_lock(&s->port_lock);
  1054. ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
  1055. &pirq->port);
  1056. if (ret) {
  1057. qemu_mutex_unlock(&s->port_lock);
  1058. return ret;
  1059. }
  1060. s->pirq[pirq->pirq].port = pirq->port;
  1061. trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
  1062. qemu_mutex_unlock(&s->port_lock);
  1063. /*
  1064. * Need to do the unmask outside port_lock because it may call
  1065. * back into the MSI translate function.
  1066. */
  1067. if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
  1068. if (s->pirq[pirq->pirq].is_masked) {
  1069. PCIDevice *dev = s->pirq[pirq->pirq].dev;
  1070. int vector = s->pirq[pirq->pirq].vector;
  1071. char *dev_path = qdev_get_dev_path(DEVICE(dev));
  1072. trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
  1073. g_free(dev_path);
  1074. if (s->pirq[pirq->pirq].is_msix) {
  1075. msix_set_mask(dev, vector, false);
  1076. } else {
  1077. msi_set_mask(dev, vector, false, NULL);
  1078. }
  1079. } else if (s->pirq[pirq->pirq].is_translated) {
  1080. /*
  1081. * If KVM had attempted to translate this one before, make it try
  1082. * again. If we unmasked, then the notifier on the MSI(-X) vector
  1083. * will already have had the same effect.
  1084. */
  1085. kvm_update_msi_routes_all(NULL, true, 0, 0);
  1086. }
  1087. }
  1088. return ret;
  1089. }
  1090. int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
  1091. {
  1092. XenEvtchnState *s = xen_evtchn_singleton;
  1093. int ret;
  1094. if (!s) {
  1095. return -ENOTSUP;
  1096. }
  1097. if (!valid_vcpu(ipi->vcpu)) {
  1098. return -ENOENT;
  1099. }
  1100. qemu_mutex_lock(&s->port_lock);
  1101. ret = allocate_port(s, ipi->vcpu, EVTCHNSTAT_ipi, 0, &ipi->port);
  1102. if (!ret && s->evtchn_in_kernel) {
  1103. assign_kernel_port(EVTCHNSTAT_ipi, ipi->port, ipi->vcpu);
  1104. }
  1105. qemu_mutex_unlock(&s->port_lock);
  1106. return ret;
  1107. }
  1108. int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain)
  1109. {
  1110. XenEvtchnState *s = xen_evtchn_singleton;
  1111. int ret;
  1112. if (!s) {
  1113. return -ENOTSUP;
  1114. }
  1115. if (interdomain->remote_dom != DOMID_QEMU &&
  1116. interdomain->remote_dom != DOMID_SELF &&
  1117. interdomain->remote_dom != xen_domid) {
  1118. return -ESRCH;
  1119. }
  1120. if (!valid_port(interdomain->remote_port)) {
  1121. return -EINVAL;
  1122. }
  1123. qemu_mutex_lock(&s->port_lock);
  1124. /* The newly allocated port starts out as unbound */
  1125. ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &interdomain->local_port);
  1126. if (ret) {
  1127. goto out;
  1128. }
  1129. if (interdomain->remote_dom == DOMID_QEMU) {
  1130. struct xenevtchn_handle *xc = s->be_handles[interdomain->remote_port];
  1131. XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
  1132. if (!xc) {
  1133. ret = -ENOENT;
  1134. goto out_free_port;
  1135. }
  1136. if (xc->guest_port) {
  1137. ret = -EBUSY;
  1138. goto out_free_port;
  1139. }
  1140. assert(xc->be_port == interdomain->remote_port);
  1141. xc->guest_port = interdomain->local_port;
  1142. if (kvm_xen_has_cap(EVTCHN_SEND)) {
  1143. assign_kernel_eventfd(lp->type, xc->guest_port, xc->fd);
  1144. }
  1145. lp->type = EVTCHNSTAT_interdomain;
  1146. lp->u.interdomain.to_qemu = 1;
  1147. lp->u.interdomain.port = interdomain->remote_port;
  1148. ret = 0;
  1149. } else {
  1150. /* Loopback */
  1151. XenEvtchnPort *rp = &s->port_table[interdomain->remote_port];
  1152. XenEvtchnPort *lp = &s->port_table[interdomain->local_port];
  1153. /*
  1154. * The 'remote' port for loopback must be an unbound port allocated
  1155. * for communication with the local domain, and must *not* be the
  1156. * port that was just allocated for the local end.
  1157. */
  1158. if (interdomain->local_port != interdomain->remote_port &&
  1159. rp->type == EVTCHNSTAT_unbound && !rp->u.interdomain.to_qemu) {
  1160. rp->type = EVTCHNSTAT_interdomain;
  1161. rp->u.interdomain.port = interdomain->local_port;
  1162. lp->type = EVTCHNSTAT_interdomain;
  1163. lp->u.interdomain.port = interdomain->remote_port;
  1164. } else {
  1165. ret = -EINVAL;
  1166. }
  1167. }
  1168. out_free_port:
  1169. if (ret) {
  1170. free_port(s, interdomain->local_port);
  1171. }
  1172. out:
  1173. qemu_mutex_unlock(&s->port_lock);
  1174. return ret;
  1175. }
  1176. int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc)
  1177. {
  1178. XenEvtchnState *s = xen_evtchn_singleton;
  1179. int ret;
  1180. if (!s) {
  1181. return -ENOTSUP;
  1182. }
  1183. if (alloc->dom != DOMID_SELF && alloc->dom != xen_domid) {
  1184. return -ESRCH;
  1185. }
  1186. if (alloc->remote_dom != DOMID_QEMU &&
  1187. alloc->remote_dom != DOMID_SELF &&
  1188. alloc->remote_dom != xen_domid) {
  1189. return -EPERM;
  1190. }
  1191. qemu_mutex_lock(&s->port_lock);
  1192. ret = allocate_port(s, 0, EVTCHNSTAT_unbound, 0, &alloc->port);
  1193. if (!ret && alloc->remote_dom == DOMID_QEMU) {
  1194. XenEvtchnPort *p = &s->port_table[alloc->port];
  1195. p->u.interdomain.to_qemu = 1;
  1196. }
  1197. qemu_mutex_unlock(&s->port_lock);
  1198. return ret;
  1199. }
  1200. int xen_evtchn_send_op(struct evtchn_send *send)
  1201. {
  1202. XenEvtchnState *s = xen_evtchn_singleton;
  1203. XenEvtchnPort *p;
  1204. int ret = 0;
  1205. if (!s) {
  1206. return -ENOTSUP;
  1207. }
  1208. if (!valid_port(send->port)) {
  1209. return -EINVAL;
  1210. }
  1211. qemu_mutex_lock(&s->port_lock);
  1212. p = &s->port_table[send->port];
  1213. switch (p->type) {
  1214. case EVTCHNSTAT_interdomain:
  1215. if (p->u.interdomain.to_qemu) {
  1216. /*
  1217. * This is an event from the guest to qemu itself, which is
  1218. * serving as the driver domain.
  1219. */
  1220. uint16_t be_port = p->u.interdomain.port;
  1221. struct xenevtchn_handle *xc = s->be_handles[be_port];
  1222. if (xc) {
  1223. eventfd_write(xc->fd, 1);
  1224. ret = 0;
  1225. } else {
  1226. ret = -ENOENT;
  1227. }
  1228. } else {
  1229. /* Loopback interdomain ports; just a complex IPI */
  1230. set_port_pending(s, p->u.interdomain.port);
  1231. }
  1232. break;
  1233. case EVTCHNSTAT_ipi:
  1234. set_port_pending(s, send->port);
  1235. break;
  1236. case EVTCHNSTAT_unbound:
  1237. /* Xen will silently drop these */
  1238. break;
  1239. default:
  1240. ret = -EINVAL;
  1241. break;
  1242. }
  1243. qemu_mutex_unlock(&s->port_lock);
  1244. return ret;
  1245. }
  1246. int xen_evtchn_set_port(uint16_t port)
  1247. {
  1248. XenEvtchnState *s = xen_evtchn_singleton;
  1249. XenEvtchnPort *p;
  1250. int ret = -EINVAL;
  1251. if (!s) {
  1252. return -ENOTSUP;
  1253. }
  1254. if (!valid_port(port)) {
  1255. return -EINVAL;
  1256. }
  1257. qemu_mutex_lock(&s->port_lock);
  1258. p = &s->port_table[port];
  1259. /* QEMU has no business sending to anything but these */
  1260. if (p->type == EVTCHNSTAT_virq ||
  1261. (p->type == EVTCHNSTAT_interdomain && p->u.interdomain.to_qemu)) {
  1262. set_port_pending(s, port);
  1263. ret = 0;
  1264. }
  1265. qemu_mutex_unlock(&s->port_lock);
  1266. return ret;
  1267. }
  1268. static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
  1269. {
  1270. uint16_t pirq;
  1271. /*
  1272. * Preserve the allocation strategy that Xen has. It looks like
  1273. * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
  1274. * to GSIs (counting up from 16), and then we count backwards from
  1275. * the top for MSIs or when the GSI space is exhausted.
  1276. */
  1277. if (type == MAP_PIRQ_TYPE_GSI) {
  1278. for (pirq = 16 ; pirq < IOAPIC_NUM_PINS; pirq++) {
  1279. if (pirq_inuse(s, pirq)) {
  1280. continue;
  1281. }
  1282. /* Found it */
  1283. goto found;
  1284. }
  1285. }
  1286. for (pirq = s->nr_pirqs - 1; pirq >= IOAPIC_NUM_PINS; pirq--) {
  1287. /* Skip whole words at a time when they're full */
  1288. if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
  1289. pirq &= ~63ULL;
  1290. continue;
  1291. }
  1292. if (pirq_inuse(s, pirq)) {
  1293. continue;
  1294. }
  1295. goto found;
  1296. }
  1297. return -ENOSPC;
  1298. found:
  1299. pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
  1300. if (gsi >= 0) {
  1301. assert(gsi < IOAPIC_NUM_PINS);
  1302. s->gsi_pirq[gsi] = pirq;
  1303. }
  1304. s->pirq[pirq].gsi = gsi;
  1305. return pirq;
  1306. }
  1307. bool xen_evtchn_set_gsi(int gsi, int *level)
  1308. {
  1309. XenEvtchnState *s = xen_evtchn_singleton;
  1310. int pirq;
  1311. assert(bql_locked());
  1312. if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
  1313. return false;
  1314. }
  1315. /*
  1316. * For the callback_gsi we need to implement a logical OR of the event
  1317. * channel GSI and the external input (e.g. from PCI INTx), because
  1318. * QEMU itself doesn't support shared level interrupts via demux or
  1319. * resamplers.
  1320. */
  1321. if (gsi && gsi == s->callback_gsi) {
  1322. /* Remember the external state of the GSI pin (e.g. from PCI INTx) */
  1323. if (!s->setting_callback_gsi) {
  1324. s->extern_gsi_level = *level;
  1325. /*
  1326. * Don't allow the external device to deassert the line if the
  1327. * eveht channel GSI should still be asserted.
  1328. */
  1329. if (!s->extern_gsi_level) {
  1330. struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
  1331. if (vi && vi->evtchn_upcall_pending) {
  1332. /* Need to poll for deassertion */
  1333. kvm_xen_set_callback_asserted();
  1334. *level = 1;
  1335. }
  1336. }
  1337. }
  1338. /*
  1339. * The event channel GSI cannot be routed to PIRQ, as that would make
  1340. * no sense. It could also deadlock on s->port_lock, if we proceed.
  1341. * So bail out now.
  1342. */
  1343. return false;
  1344. }
  1345. QEMU_LOCK_GUARD(&s->port_lock);
  1346. pirq = s->gsi_pirq[gsi];
  1347. if (!pirq) {
  1348. return false;
  1349. }
  1350. if (*level) {
  1351. int port = s->pirq[pirq].port;
  1352. s->pirq_gsi_set |= (1U << gsi);
  1353. if (port) {
  1354. set_port_pending(s, port);
  1355. }
  1356. } else {
  1357. s->pirq_gsi_set &= ~(1U << gsi);
  1358. }
  1359. return true;
  1360. }
  1361. static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
  1362. {
  1363. /* The vector (in low 8 bits of data) must be zero */
  1364. if (data & 0xff) {
  1365. return 0;
  1366. }
  1367. uint32_t pirq = (addr & 0xff000) >> 12;
  1368. pirq |= (addr >> 32) & 0xffffff00;
  1369. return pirq;
  1370. }
  1371. static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
  1372. int except_pirq)
  1373. {
  1374. uint32_t pirq;
  1375. for (pirq = 0; pirq < s->nr_pirqs; pirq++) {
  1376. /*
  1377. * We could be cleverer here, but it isn't really a fast path, and
  1378. * this trivial optimisation is enough to let us skip the big gap
  1379. * in the middle a bit quicker (in terms of both loop iterations,
  1380. * and cache lines).
  1381. */
  1382. if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
  1383. pirq += 64;
  1384. continue;
  1385. }
  1386. if (except_pirq && pirq == except_pirq) {
  1387. continue;
  1388. }
  1389. if (s->pirq[pirq].dev != dev) {
  1390. continue;
  1391. }
  1392. if (vector != -1 && s->pirq[pirq].vector != vector) {
  1393. continue;
  1394. }
  1395. /* It could theoretically be bound to a port already, but that is OK. */
  1396. s->pirq[pirq].dev = dev;
  1397. s->pirq[pirq].gsi = IRQ_UNBOUND;
  1398. s->pirq[pirq].is_msix = false;
  1399. s->pirq[pirq].vector = 0;
  1400. s->pirq[pirq].is_masked = false;
  1401. s->pirq[pirq].is_translated = false;
  1402. }
  1403. }
  1404. void xen_evtchn_remove_pci_device(PCIDevice *dev)
  1405. {
  1406. XenEvtchnState *s = xen_evtchn_singleton;
  1407. if (!s) {
  1408. return;
  1409. }
  1410. QEMU_LOCK_GUARD(&s->port_lock);
  1411. do_remove_pci_vector(s, dev, -1, 0);
  1412. }
  1413. void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
  1414. uint64_t addr, uint32_t data, bool is_masked)
  1415. {
  1416. XenEvtchnState *s = xen_evtchn_singleton;
  1417. uint32_t pirq;
  1418. if (!s) {
  1419. return;
  1420. }
  1421. assert(bql_locked());
  1422. pirq = msi_pirq_target(addr, data);
  1423. /*
  1424. * The PIRQ# must be sane, and there must be an allocated PIRQ in
  1425. * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
  1426. */
  1427. if (!pirq || pirq >= s->nr_pirqs || !pirq_inuse(s, pirq) ||
  1428. (s->pirq[pirq].gsi != IRQ_UNBOUND &&
  1429. s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
  1430. pirq = 0;
  1431. }
  1432. if (pirq) {
  1433. s->pirq[pirq].dev = dev;
  1434. s->pirq[pirq].gsi = IRQ_MSI_EMU;
  1435. s->pirq[pirq].is_msix = is_msix;
  1436. s->pirq[pirq].vector = vector;
  1437. s->pirq[pirq].is_masked = is_masked;
  1438. }
  1439. /* Remove any (other) entries for this {device, vector} */
  1440. do_remove_pci_vector(s, dev, vector, pirq);
  1441. }
  1442. int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
  1443. uint64_t address, uint32_t data)
  1444. {
  1445. XenEvtchnState *s = xen_evtchn_singleton;
  1446. uint32_t pirq, port;
  1447. CPUState *cpu;
  1448. if (!s) {
  1449. return 1; /* Not a PIRQ */
  1450. }
  1451. assert(bql_locked());
  1452. pirq = msi_pirq_target(address, data);
  1453. if (!pirq || pirq >= s->nr_pirqs) {
  1454. return 1; /* Not a PIRQ */
  1455. }
  1456. if (!kvm_xen_has_cap(EVTCHN_2LEVEL)) {
  1457. return -ENOTSUP;
  1458. }
  1459. if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
  1460. return -EINVAL;
  1461. }
  1462. /* Remember that KVM tried to translate this. It might need to try again. */
  1463. s->pirq[pirq].is_translated = true;
  1464. QEMU_LOCK_GUARD(&s->port_lock);
  1465. port = s->pirq[pirq].port;
  1466. if (!valid_port(port)) {
  1467. return -EINVAL;
  1468. }
  1469. cpu = qemu_get_cpu(s->port_table[port].vcpu);
  1470. if (!cpu) {
  1471. return -EINVAL;
  1472. }
  1473. route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
  1474. route->u.xen_evtchn.port = port;
  1475. route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
  1476. route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
  1477. return 0; /* Handled */
  1478. }
  1479. bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
  1480. {
  1481. XenEvtchnState *s = xen_evtchn_singleton;
  1482. uint32_t pirq, port;
  1483. if (!s) {
  1484. return false;
  1485. }
  1486. assert(bql_locked());
  1487. pirq = msi_pirq_target(address, data);
  1488. if (!pirq || pirq >= s->nr_pirqs) {
  1489. return false;
  1490. }
  1491. QEMU_LOCK_GUARD(&s->port_lock);
  1492. port = s->pirq[pirq].port;
  1493. if (!valid_port(port)) {
  1494. return false;
  1495. }
  1496. set_port_pending(s, port);
  1497. return true;
  1498. }
  1499. int xen_physdev_map_pirq(struct physdev_map_pirq *map)
  1500. {
  1501. XenEvtchnState *s = xen_evtchn_singleton;
  1502. int pirq = map->pirq;
  1503. int gsi = map->index;
  1504. if (!s) {
  1505. return -ENOTSUP;
  1506. }
  1507. BQL_LOCK_GUARD();
  1508. QEMU_LOCK_GUARD(&s->port_lock);
  1509. if (map->domid != DOMID_SELF && map->domid != xen_domid) {
  1510. return -EPERM;
  1511. }
  1512. if (map->type != MAP_PIRQ_TYPE_GSI) {
  1513. return -EINVAL;
  1514. }
  1515. if (gsi < 0 || gsi >= IOAPIC_NUM_PINS) {
  1516. return -EINVAL;
  1517. }
  1518. if (pirq < 0) {
  1519. pirq = allocate_pirq(s, map->type, gsi);
  1520. if (pirq < 0) {
  1521. return pirq;
  1522. }
  1523. map->pirq = pirq;
  1524. } else if (pirq > s->nr_pirqs) {
  1525. return -EINVAL;
  1526. } else {
  1527. /*
  1528. * User specified a valid-looking PIRQ#. Allow it if it is
  1529. * allocated and not yet bound, or if it is unallocated
  1530. */
  1531. if (pirq_inuse(s, pirq)) {
  1532. if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
  1533. return -EBUSY;
  1534. }
  1535. } else {
  1536. /* If it was unused, mark it used now. */
  1537. pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
  1538. }
  1539. /* Set the mapping in both directions. */
  1540. s->pirq[pirq].gsi = gsi;
  1541. s->gsi_pirq[gsi] = pirq;
  1542. }
  1543. trace_kvm_xen_map_pirq(pirq, gsi);
  1544. return 0;
  1545. }
  1546. int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
  1547. {
  1548. XenEvtchnState *s = xen_evtchn_singleton;
  1549. int pirq = unmap->pirq;
  1550. int gsi;
  1551. if (!s) {
  1552. return -ENOTSUP;
  1553. }
  1554. if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid) {
  1555. return -EPERM;
  1556. }
  1557. if (pirq < 0 || pirq >= s->nr_pirqs) {
  1558. return -EINVAL;
  1559. }
  1560. BQL_LOCK_GUARD();
  1561. qemu_mutex_lock(&s->port_lock);
  1562. if (!pirq_inuse(s, pirq)) {
  1563. qemu_mutex_unlock(&s->port_lock);
  1564. return -ENOENT;
  1565. }
  1566. gsi = s->pirq[pirq].gsi;
  1567. /* We can only unmap GSI PIRQs */
  1568. if (gsi < 0) {
  1569. qemu_mutex_unlock(&s->port_lock);
  1570. return -EINVAL;
  1571. }
  1572. s->gsi_pirq[gsi] = 0;
  1573. s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
  1574. pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
  1575. trace_kvm_xen_unmap_pirq(pirq, gsi);
  1576. qemu_mutex_unlock(&s->port_lock);
  1577. if (gsi == IRQ_MSI_EMU) {
  1578. kvm_update_msi_routes_all(NULL, true, 0, 0);
  1579. }
  1580. return 0;
  1581. }
  1582. int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
  1583. {
  1584. XenEvtchnState *s = xen_evtchn_singleton;
  1585. int pirq = eoi->irq;
  1586. int gsi;
  1587. if (!s) {
  1588. return -ENOTSUP;
  1589. }
  1590. BQL_LOCK_GUARD();
  1591. QEMU_LOCK_GUARD(&s->port_lock);
  1592. if (!pirq_inuse(s, pirq)) {
  1593. return -ENOENT;
  1594. }
  1595. gsi = s->pirq[pirq].gsi;
  1596. if (gsi < 0) {
  1597. return -EINVAL;
  1598. }
  1599. /* Reassert a level IRQ if needed */
  1600. if (s->pirq_gsi_set & (1U << gsi)) {
  1601. int port = s->pirq[pirq].port;
  1602. if (port) {
  1603. set_port_pending(s, port);
  1604. }
  1605. }
  1606. return 0;
  1607. }
  1608. int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
  1609. {
  1610. XenEvtchnState *s = xen_evtchn_singleton;
  1611. int pirq = query->irq;
  1612. if (!s) {
  1613. return -ENOTSUP;
  1614. }
  1615. BQL_LOCK_GUARD();
  1616. QEMU_LOCK_GUARD(&s->port_lock);
  1617. if (!pirq_inuse(s, pirq)) {
  1618. return -ENOENT;
  1619. }
  1620. if (s->pirq[pirq].gsi >= 0) {
  1621. query->flags = XENIRQSTAT_needs_eoi;
  1622. } else {
  1623. query->flags = 0;
  1624. }
  1625. return 0;
  1626. }
  1627. int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
  1628. {
  1629. XenEvtchnState *s = xen_evtchn_singleton;
  1630. int pirq;
  1631. if (!s) {
  1632. return -ENOTSUP;
  1633. }
  1634. QEMU_LOCK_GUARD(&s->port_lock);
  1635. pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
  1636. if (pirq < 0) {
  1637. return pirq;
  1638. }
  1639. get->pirq = pirq;
  1640. trace_kvm_xen_get_free_pirq(pirq, get->type);
  1641. return 0;
  1642. }
  1643. struct xenevtchn_handle *xen_be_evtchn_open(void)
  1644. {
  1645. struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
  1646. xc->fd = eventfd(0, EFD_CLOEXEC);
  1647. if (xc->fd < 0) {
  1648. free(xc);
  1649. return NULL;
  1650. }
  1651. return xc;
  1652. }
  1653. static int find_be_port(XenEvtchnState *s, struct xenevtchn_handle *xc)
  1654. {
  1655. int i;
  1656. for (i = 1; i < EVTCHN_2L_NR_CHANNELS; i++) {
  1657. if (!s->be_handles[i]) {
  1658. s->be_handles[i] = xc;
  1659. xc->be_port = i;
  1660. return i;
  1661. }
  1662. }
  1663. return 0;
  1664. }
  1665. int xen_be_evtchn_bind_interdomain(struct xenevtchn_handle *xc, uint32_t domid,
  1666. evtchn_port_t guest_port)
  1667. {
  1668. XenEvtchnState *s = xen_evtchn_singleton;
  1669. XenEvtchnPort *gp;
  1670. uint16_t be_port = 0;
  1671. int ret;
  1672. if (!s) {
  1673. return -ENOTSUP;
  1674. }
  1675. if (!xc) {
  1676. return -EFAULT;
  1677. }
  1678. if (domid != xen_domid) {
  1679. return -ESRCH;
  1680. }
  1681. if (!valid_port(guest_port)) {
  1682. return -EINVAL;
  1683. }
  1684. qemu_mutex_lock(&s->port_lock);
  1685. /* The guest has to have an unbound port waiting for us to bind */
  1686. gp = &s->port_table[guest_port];
  1687. switch (gp->type) {
  1688. case EVTCHNSTAT_interdomain:
  1689. /* Allow rebinding after migration, preserve port # if possible */
  1690. be_port = gp->u.interdomain.port;
  1691. assert(be_port != 0);
  1692. if (!s->be_handles[be_port]) {
  1693. s->be_handles[be_port] = xc;
  1694. xc->guest_port = guest_port;
  1695. ret = xc->be_port = be_port;
  1696. if (kvm_xen_has_cap(EVTCHN_SEND)) {
  1697. assign_kernel_eventfd(gp->type, guest_port, xc->fd);
  1698. }
  1699. break;
  1700. }
  1701. /* fall through */
  1702. case EVTCHNSTAT_unbound:
  1703. be_port = find_be_port(s, xc);
  1704. if (!be_port) {
  1705. ret = -ENOSPC;
  1706. goto out;
  1707. }
  1708. gp->type = EVTCHNSTAT_interdomain;
  1709. gp->u.interdomain.to_qemu = 1;
  1710. gp->u.interdomain.port = be_port;
  1711. xc->guest_port = guest_port;
  1712. if (kvm_xen_has_cap(EVTCHN_SEND)) {
  1713. assign_kernel_eventfd(gp->type, guest_port, xc->fd);
  1714. }
  1715. ret = be_port;
  1716. break;
  1717. default:
  1718. ret = -EINVAL;
  1719. break;
  1720. }
  1721. out:
  1722. qemu_mutex_unlock(&s->port_lock);
  1723. return ret;
  1724. }
  1725. int xen_be_evtchn_unbind(struct xenevtchn_handle *xc, evtchn_port_t port)
  1726. {
  1727. XenEvtchnState *s = xen_evtchn_singleton;
  1728. int ret;
  1729. if (!s) {
  1730. return -ENOTSUP;
  1731. }
  1732. if (!xc) {
  1733. return -EFAULT;
  1734. }
  1735. qemu_mutex_lock(&s->port_lock);
  1736. if (port && port != xc->be_port) {
  1737. ret = -EINVAL;
  1738. goto out;
  1739. }
  1740. if (xc->guest_port) {
  1741. XenEvtchnPort *gp = &s->port_table[xc->guest_port];
  1742. /* This should never *not* be true */
  1743. if (gp->type == EVTCHNSTAT_interdomain) {
  1744. gp->type = EVTCHNSTAT_unbound;
  1745. gp->u.interdomain.port = 0;
  1746. }
  1747. if (kvm_xen_has_cap(EVTCHN_SEND)) {
  1748. deassign_kernel_port(xc->guest_port);
  1749. }
  1750. xc->guest_port = 0;
  1751. }
  1752. s->be_handles[xc->be_port] = NULL;
  1753. xc->be_port = 0;
  1754. ret = 0;
  1755. out:
  1756. qemu_mutex_unlock(&s->port_lock);
  1757. return ret;
  1758. }
  1759. int xen_be_evtchn_close(struct xenevtchn_handle *xc)
  1760. {
  1761. if (!xc) {
  1762. return -EFAULT;
  1763. }
  1764. xen_be_evtchn_unbind(xc, 0);
  1765. close(xc->fd);
  1766. free(xc);
  1767. return 0;
  1768. }
  1769. int xen_be_evtchn_fd(struct xenevtchn_handle *xc)
  1770. {
  1771. if (!xc) {
  1772. return -1;
  1773. }
  1774. return xc->fd;
  1775. }
  1776. int xen_be_evtchn_notify(struct xenevtchn_handle *xc, evtchn_port_t port)
  1777. {
  1778. XenEvtchnState *s = xen_evtchn_singleton;
  1779. int ret;
  1780. if (!s) {
  1781. return -ENOTSUP;
  1782. }
  1783. if (!xc) {
  1784. return -EFAULT;
  1785. }
  1786. qemu_mutex_lock(&s->port_lock);
  1787. if (xc->guest_port) {
  1788. set_port_pending(s, xc->guest_port);
  1789. ret = 0;
  1790. } else {
  1791. ret = -ENOTCONN;
  1792. }
  1793. qemu_mutex_unlock(&s->port_lock);
  1794. return ret;
  1795. }
  1796. int xen_be_evtchn_pending(struct xenevtchn_handle *xc)
  1797. {
  1798. uint64_t val;
  1799. if (!xc) {
  1800. return -EFAULT;
  1801. }
  1802. if (!xc->be_port) {
  1803. return 0;
  1804. }
  1805. if (eventfd_read(xc->fd, &val)) {
  1806. return -errno;
  1807. }
  1808. return val ? xc->be_port : 0;
  1809. }
  1810. int xen_be_evtchn_unmask(struct xenevtchn_handle *xc, evtchn_port_t port)
  1811. {
  1812. if (!xc) {
  1813. return -EFAULT;
  1814. }
  1815. if (xc->be_port != port) {
  1816. return -EINVAL;
  1817. }
  1818. /*
  1819. * We don't actually do anything to unmask it; the event was already
  1820. * consumed in xen_be_evtchn_pending().
  1821. */
  1822. return 0;
  1823. }
  1824. int xen_be_evtchn_get_guest_port(struct xenevtchn_handle *xc)
  1825. {
  1826. return xc->guest_port;
  1827. }
  1828. EvtchnInfoList *qmp_xen_event_list(Error **errp)
  1829. {
  1830. XenEvtchnState *s = xen_evtchn_singleton;
  1831. EvtchnInfoList *head = NULL, **tail = &head;
  1832. void *shinfo, *pending, *mask;
  1833. int i;
  1834. if (!s) {
  1835. error_setg(errp, "Xen event channel emulation not enabled");
  1836. return NULL;
  1837. }
  1838. shinfo = xen_overlay_get_shinfo_ptr();
  1839. if (!shinfo) {
  1840. error_setg(errp, "Xen shared info page not allocated");
  1841. return NULL;
  1842. }
  1843. if (xen_is_long_mode()) {
  1844. pending = shinfo + offsetof(struct shared_info, evtchn_pending);
  1845. mask = shinfo + offsetof(struct shared_info, evtchn_mask);
  1846. } else {
  1847. pending = shinfo + offsetof(struct compat_shared_info, evtchn_pending);
  1848. mask = shinfo + offsetof(struct compat_shared_info, evtchn_mask);
  1849. }
  1850. QEMU_LOCK_GUARD(&s->port_lock);
  1851. for (i = 0; i < s->nr_ports; i++) {
  1852. XenEvtchnPort *p = &s->port_table[i];
  1853. EvtchnInfo *info;
  1854. if (p->type == EVTCHNSTAT_closed) {
  1855. continue;
  1856. }
  1857. info = g_new0(EvtchnInfo, 1);
  1858. info->port = i;
  1859. qemu_build_assert(EVTCHN_PORT_TYPE_CLOSED == EVTCHNSTAT_closed);
  1860. qemu_build_assert(EVTCHN_PORT_TYPE_UNBOUND == EVTCHNSTAT_unbound);
  1861. qemu_build_assert(EVTCHN_PORT_TYPE_INTERDOMAIN == EVTCHNSTAT_interdomain);
  1862. qemu_build_assert(EVTCHN_PORT_TYPE_PIRQ == EVTCHNSTAT_pirq);
  1863. qemu_build_assert(EVTCHN_PORT_TYPE_VIRQ == EVTCHNSTAT_virq);
  1864. qemu_build_assert(EVTCHN_PORT_TYPE_IPI == EVTCHNSTAT_ipi);
  1865. info->type = p->type;
  1866. if (p->type == EVTCHNSTAT_interdomain) {
  1867. info->remote_domain = g_strdup(p->u.interdomain.to_qemu ?
  1868. "qemu" : "loopback");
  1869. info->target = p->u.interdomain.port;
  1870. } else {
  1871. info->target = p->u.val; /* pirq# or virq# */
  1872. }
  1873. info->vcpu = p->vcpu;
  1874. info->pending = test_bit(i, pending);
  1875. info->masked = test_bit(i, mask);
  1876. QAPI_LIST_APPEND(tail, info);
  1877. }
  1878. return head;
  1879. }
  1880. void qmp_xen_event_inject(uint32_t port, Error **errp)
  1881. {
  1882. XenEvtchnState *s = xen_evtchn_singleton;
  1883. if (!s) {
  1884. error_setg(errp, "Xen event channel emulation not enabled");
  1885. return;
  1886. }
  1887. if (!valid_port(port)) {
  1888. error_setg(errp, "Invalid port %u", port);
  1889. }
  1890. QEMU_LOCK_GUARD(&s->port_lock);
  1891. if (set_port_pending(s, port)) {
  1892. error_setg(errp, "Failed to set port %u", port);
  1893. return;
  1894. }
  1895. }
  1896. void hmp_xen_event_list(Monitor *mon, const QDict *qdict)
  1897. {
  1898. EvtchnInfoList *iter, *info_list;
  1899. Error *err = NULL;
  1900. info_list = qmp_xen_event_list(&err);
  1901. if (err) {
  1902. hmp_handle_error(mon, err);
  1903. return;
  1904. }
  1905. for (iter = info_list; iter; iter = iter->next) {
  1906. EvtchnInfo *info = iter->value;
  1907. monitor_printf(mon, "port %4u: vcpu: %d %s", info->port, info->vcpu,
  1908. EvtchnPortType_str(info->type));
  1909. if (info->type != EVTCHN_PORT_TYPE_IPI) {
  1910. monitor_printf(mon, "(");
  1911. if (info->remote_domain) {
  1912. monitor_printf(mon, "%s:", info->remote_domain);
  1913. }
  1914. monitor_printf(mon, "%d)", info->target);
  1915. }
  1916. if (info->pending) {
  1917. monitor_printf(mon, " PENDING");
  1918. }
  1919. if (info->masked) {
  1920. monitor_printf(mon, " MASKED");
  1921. }
  1922. monitor_printf(mon, "\n");
  1923. }
  1924. qapi_free_EvtchnInfoList(info_list);
  1925. }
  1926. void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
  1927. {
  1928. int port = qdict_get_int(qdict, "port");
  1929. Error *err = NULL;
  1930. qmp_xen_event_inject(port, &err);
  1931. if (err) {
  1932. hmp_handle_error(mon, err);
  1933. } else {
  1934. monitor_printf(mon, "Delivered port %d\n", port);
  1935. }
  1936. }