hyperv.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. /*
  2. * Hyper-V guest/hypervisor interaction
  3. *
  4. * Copyright (c) 2015-2018 Virtuozzo International GmbH.
  5. *
  6. * This work is licensed under the terms of the GNU GPL, version 2 or later.
  7. * See the COPYING file in the top-level directory.
  8. */
  9. #include "qemu/osdep.h"
  10. #include "qemu/main-loop.h"
  11. #include "qapi/error.h"
  12. #include "exec/address-spaces.h"
  13. #include "sysemu/kvm.h"
  14. #include "qemu/bitops.h"
  15. #include "qemu/queue.h"
  16. #include "qemu/rcu.h"
  17. #include "qemu/rcu_queue.h"
  18. #include "hw/hyperv/hyperv.h"
  19. typedef struct SynICState {
  20. DeviceState parent_obj;
  21. CPUState *cs;
  22. bool enabled;
  23. hwaddr msg_page_addr;
  24. hwaddr event_page_addr;
  25. MemoryRegion msg_page_mr;
  26. MemoryRegion event_page_mr;
  27. struct hyperv_message_page *msg_page;
  28. struct hyperv_event_flags_page *event_page;
  29. } SynICState;
  30. #define TYPE_SYNIC "hyperv-synic"
  31. #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
  32. static SynICState *get_synic(CPUState *cs)
  33. {
  34. return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
  35. }
  36. static void synic_update(SynICState *synic, bool enable,
  37. hwaddr msg_page_addr, hwaddr event_page_addr)
  38. {
  39. synic->enabled = enable;
  40. if (synic->msg_page_addr != msg_page_addr) {
  41. if (synic->msg_page_addr) {
  42. memory_region_del_subregion(get_system_memory(),
  43. &synic->msg_page_mr);
  44. }
  45. if (msg_page_addr) {
  46. memory_region_add_subregion(get_system_memory(), msg_page_addr,
  47. &synic->msg_page_mr);
  48. }
  49. synic->msg_page_addr = msg_page_addr;
  50. }
  51. if (synic->event_page_addr != event_page_addr) {
  52. if (synic->event_page_addr) {
  53. memory_region_del_subregion(get_system_memory(),
  54. &synic->event_page_mr);
  55. }
  56. if (event_page_addr) {
  57. memory_region_add_subregion(get_system_memory(), event_page_addr,
  58. &synic->event_page_mr);
  59. }
  60. synic->event_page_addr = event_page_addr;
  61. }
  62. }
  63. void hyperv_synic_update(CPUState *cs, bool enable,
  64. hwaddr msg_page_addr, hwaddr event_page_addr)
  65. {
  66. SynICState *synic = get_synic(cs);
  67. if (!synic) {
  68. return;
  69. }
  70. synic_update(synic, enable, msg_page_addr, event_page_addr);
  71. }
  72. static void synic_realize(DeviceState *dev, Error **errp)
  73. {
  74. Object *obj = OBJECT(dev);
  75. SynICState *synic = SYNIC(dev);
  76. char *msgp_name, *eventp_name;
  77. uint32_t vp_index;
  78. /* memory region names have to be globally unique */
  79. vp_index = hyperv_vp_index(synic->cs);
  80. msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
  81. eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
  82. memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
  83. sizeof(*synic->msg_page), &error_abort);
  84. memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
  85. sizeof(*synic->event_page), &error_abort);
  86. synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
  87. synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
  88. g_free(msgp_name);
  89. g_free(eventp_name);
  90. }
  91. static void synic_reset(DeviceState *dev)
  92. {
  93. SynICState *synic = SYNIC(dev);
  94. memset(synic->msg_page, 0, sizeof(*synic->msg_page));
  95. memset(synic->event_page, 0, sizeof(*synic->event_page));
  96. synic_update(synic, false, 0, 0);
  97. }
  98. static void synic_class_init(ObjectClass *klass, void *data)
  99. {
  100. DeviceClass *dc = DEVICE_CLASS(klass);
  101. dc->realize = synic_realize;
  102. dc->reset = synic_reset;
  103. dc->user_creatable = false;
  104. }
  105. void hyperv_synic_add(CPUState *cs)
  106. {
  107. Object *obj;
  108. SynICState *synic;
  109. obj = object_new(TYPE_SYNIC);
  110. synic = SYNIC(obj);
  111. synic->cs = cs;
  112. object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
  113. object_unref(obj);
  114. object_property_set_bool(obj, true, "realized", &error_abort);
  115. }
  116. void hyperv_synic_reset(CPUState *cs)
  117. {
  118. device_reset(DEVICE(get_synic(cs)));
  119. }
  120. static const TypeInfo synic_type_info = {
  121. .name = TYPE_SYNIC,
  122. .parent = TYPE_DEVICE,
  123. .instance_size = sizeof(SynICState),
  124. .class_init = synic_class_init,
  125. };
  126. static void synic_register_types(void)
  127. {
  128. type_register_static(&synic_type_info);
  129. }
  130. type_init(synic_register_types)
  131. /*
  132. * KVM has its own message producers (SynIC timers). To guarantee
  133. * serialization with both KVM vcpu and the guest cpu, the messages are first
  134. * staged in an intermediate area and then posted to the SynIC message page in
  135. * the vcpu thread.
  136. */
  137. typedef struct HvSintStagedMessage {
  138. /* message content staged by hyperv_post_msg */
  139. struct hyperv_message msg;
  140. /* callback + data (r/o) to complete the processing in a BH */
  141. HvSintMsgCb cb;
  142. void *cb_data;
  143. /* message posting status filled by cpu_post_msg */
  144. int status;
  145. /* passing the buck: */
  146. enum {
  147. /* initial state */
  148. HV_STAGED_MSG_FREE,
  149. /*
  150. * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
  151. * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
  152. */
  153. HV_STAGED_MSG_BUSY,
  154. /*
  155. * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
  156. * notify the guest, records the status, marks the posting done (BUSY
  157. * -> POSTED), and schedules sint_msg_bh BH
  158. */
  159. HV_STAGED_MSG_POSTED,
  160. /*
  161. * sint_msg_bh (BH) verifies that the posting is done, runs the
  162. * callback, and starts over (POSTED -> FREE)
  163. */
  164. } state;
  165. } HvSintStagedMessage;
  166. struct HvSintRoute {
  167. uint32_t sint;
  168. SynICState *synic;
  169. int gsi;
  170. EventNotifier sint_set_notifier;
  171. EventNotifier sint_ack_notifier;
  172. HvSintStagedMessage *staged_msg;
  173. unsigned refcount;
  174. };
  175. static CPUState *hyperv_find_vcpu(uint32_t vp_index)
  176. {
  177. CPUState *cs = qemu_get_cpu(vp_index);
  178. assert(hyperv_vp_index(cs) == vp_index);
  179. return cs;
  180. }
  181. /*
  182. * BH to complete the processing of a staged message.
  183. */
  184. static void sint_msg_bh(void *opaque)
  185. {
  186. HvSintRoute *sint_route = opaque;
  187. HvSintStagedMessage *staged_msg = sint_route->staged_msg;
  188. if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
  189. /* status nor ready yet (spurious ack from guest?), ignore */
  190. return;
  191. }
  192. staged_msg->cb(staged_msg->cb_data, staged_msg->status);
  193. staged_msg->status = 0;
  194. /* staged message processing finished, ready to start over */
  195. atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
  196. /* drop the reference taken in hyperv_post_msg */
  197. hyperv_sint_route_unref(sint_route);
  198. }
  199. /*
  200. * Worker to transfer the message from the staging area into the SynIC message
  201. * page in vcpu context.
  202. */
  203. static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
  204. {
  205. HvSintRoute *sint_route = data.host_ptr;
  206. HvSintStagedMessage *staged_msg = sint_route->staged_msg;
  207. SynICState *synic = sint_route->synic;
  208. struct hyperv_message *dst_msg;
  209. bool wait_for_sint_ack = false;
  210. assert(staged_msg->state == HV_STAGED_MSG_BUSY);
  211. if (!synic->enabled || !synic->msg_page_addr) {
  212. staged_msg->status = -ENXIO;
  213. goto posted;
  214. }
  215. dst_msg = &synic->msg_page->slot[sint_route->sint];
  216. if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
  217. dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
  218. staged_msg->status = -EAGAIN;
  219. wait_for_sint_ack = true;
  220. } else {
  221. memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
  222. staged_msg->status = hyperv_sint_route_set_sint(sint_route);
  223. }
  224. memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
  225. posted:
  226. atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
  227. /*
  228. * Notify the msg originator of the progress made; if the slot was busy we
  229. * set msg_pending flag in it so it will be the guest who will do EOM and
  230. * trigger the notification from KVM via sint_ack_notifier
  231. */
  232. if (!wait_for_sint_ack) {
  233. aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
  234. sint_route);
  235. }
  236. }
  237. /*
  238. * Post a Hyper-V message to the staging area, for delivery to guest in the
  239. * vcpu thread.
  240. */
  241. int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
  242. {
  243. HvSintStagedMessage *staged_msg = sint_route->staged_msg;
  244. assert(staged_msg);
  245. /* grab the staging area */
  246. if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
  247. HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
  248. return -EAGAIN;
  249. }
  250. memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
  251. /* hold a reference on sint_route until the callback is finished */
  252. hyperv_sint_route_ref(sint_route);
  253. /* schedule message posting attempt in vcpu thread */
  254. async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
  255. RUN_ON_CPU_HOST_PTR(sint_route));
  256. return 0;
  257. }
  258. static void sint_ack_handler(EventNotifier *notifier)
  259. {
  260. HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
  261. sint_ack_notifier);
  262. event_notifier_test_and_clear(notifier);
  263. /*
  264. * the guest consumed the previous message so complete the current one with
  265. * -EAGAIN and let the msg originator retry
  266. */
  267. aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
  268. }
  269. /*
  270. * Set given event flag for a given sint on a given vcpu, and signal the sint.
  271. */
  272. int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
  273. {
  274. int ret;
  275. SynICState *synic = sint_route->synic;
  276. unsigned long *flags, set_mask;
  277. unsigned set_idx;
  278. if (eventno > HV_EVENT_FLAGS_COUNT) {
  279. return -EINVAL;
  280. }
  281. if (!synic->enabled || !synic->event_page_addr) {
  282. return -ENXIO;
  283. }
  284. set_idx = BIT_WORD(eventno);
  285. set_mask = BIT_MASK(eventno);
  286. flags = synic->event_page->slot[sint_route->sint].flags;
  287. if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
  288. memory_region_set_dirty(&synic->event_page_mr, 0,
  289. sizeof(*synic->event_page));
  290. ret = hyperv_sint_route_set_sint(sint_route);
  291. } else {
  292. ret = 0;
  293. }
  294. return ret;
  295. }
  296. HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
  297. HvSintMsgCb cb, void *cb_data)
  298. {
  299. HvSintRoute *sint_route;
  300. EventNotifier *ack_notifier;
  301. int r, gsi;
  302. CPUState *cs;
  303. SynICState *synic;
  304. cs = hyperv_find_vcpu(vp_index);
  305. if (!cs) {
  306. return NULL;
  307. }
  308. synic = get_synic(cs);
  309. if (!synic) {
  310. return NULL;
  311. }
  312. sint_route = g_new0(HvSintRoute, 1);
  313. r = event_notifier_init(&sint_route->sint_set_notifier, false);
  314. if (r) {
  315. goto err;
  316. }
  317. ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
  318. if (ack_notifier) {
  319. sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
  320. sint_route->staged_msg->cb = cb;
  321. sint_route->staged_msg->cb_data = cb_data;
  322. r = event_notifier_init(ack_notifier, false);
  323. if (r) {
  324. goto err_sint_set_notifier;
  325. }
  326. event_notifier_set_handler(ack_notifier, sint_ack_handler);
  327. }
  328. gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
  329. if (gsi < 0) {
  330. goto err_gsi;
  331. }
  332. r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
  333. &sint_route->sint_set_notifier,
  334. ack_notifier, gsi);
  335. if (r) {
  336. goto err_irqfd;
  337. }
  338. sint_route->gsi = gsi;
  339. sint_route->synic = synic;
  340. sint_route->sint = sint;
  341. sint_route->refcount = 1;
  342. return sint_route;
  343. err_irqfd:
  344. kvm_irqchip_release_virq(kvm_state, gsi);
  345. err_gsi:
  346. if (ack_notifier) {
  347. event_notifier_set_handler(ack_notifier, NULL);
  348. event_notifier_cleanup(ack_notifier);
  349. g_free(sint_route->staged_msg);
  350. }
  351. err_sint_set_notifier:
  352. event_notifier_cleanup(&sint_route->sint_set_notifier);
  353. err:
  354. g_free(sint_route);
  355. return NULL;
  356. }
  357. void hyperv_sint_route_ref(HvSintRoute *sint_route)
  358. {
  359. sint_route->refcount++;
  360. }
  361. void hyperv_sint_route_unref(HvSintRoute *sint_route)
  362. {
  363. if (!sint_route) {
  364. return;
  365. }
  366. assert(sint_route->refcount > 0);
  367. if (--sint_route->refcount) {
  368. return;
  369. }
  370. kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
  371. &sint_route->sint_set_notifier,
  372. sint_route->gsi);
  373. kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
  374. if (sint_route->staged_msg) {
  375. event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
  376. event_notifier_cleanup(&sint_route->sint_ack_notifier);
  377. g_free(sint_route->staged_msg);
  378. }
  379. event_notifier_cleanup(&sint_route->sint_set_notifier);
  380. g_free(sint_route);
  381. }
  382. int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
  383. {
  384. return event_notifier_set(&sint_route->sint_set_notifier);
  385. }
  386. typedef struct EventFlagHandler {
  387. struct rcu_head rcu;
  388. QLIST_ENTRY(EventFlagHandler) link;
  389. uint32_t conn_id;
  390. EventNotifier *notifier;
  391. } EventFlagHandler;
  392. static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
  393. static QemuMutex handlers_mutex;
  394. static void __attribute__((constructor)) hv_init(void)
  395. {
  396. QLIST_INIT(&event_flag_handlers);
  397. qemu_mutex_init(&handlers_mutex);
  398. }
  399. int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
  400. {
  401. int ret;
  402. EventFlagHandler *handler;
  403. qemu_mutex_lock(&handlers_mutex);
  404. QLIST_FOREACH(handler, &event_flag_handlers, link) {
  405. if (handler->conn_id == conn_id) {
  406. if (notifier) {
  407. ret = -EEXIST;
  408. } else {
  409. QLIST_REMOVE_RCU(handler, link);
  410. g_free_rcu(handler, rcu);
  411. ret = 0;
  412. }
  413. goto unlock;
  414. }
  415. }
  416. if (notifier) {
  417. handler = g_new(EventFlagHandler, 1);
  418. handler->conn_id = conn_id;
  419. handler->notifier = notifier;
  420. QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
  421. ret = 0;
  422. } else {
  423. ret = -ENOENT;
  424. }
  425. unlock:
  426. qemu_mutex_unlock(&handlers_mutex);
  427. return ret;
  428. }
  429. uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
  430. {
  431. uint16_t ret;
  432. EventFlagHandler *handler;
  433. if (unlikely(!fast)) {
  434. hwaddr addr = param;
  435. if (addr & (__alignof__(addr) - 1)) {
  436. return HV_STATUS_INVALID_ALIGNMENT;
  437. }
  438. param = ldq_phys(&address_space_memory, addr);
  439. }
  440. /*
  441. * Per spec, bits 32-47 contain the extra "flag number". However, we
  442. * have no use for it, and in all known usecases it is zero, so just
  443. * report lookup failure if it isn't.
  444. */
  445. if (param & 0xffff00000000ULL) {
  446. return HV_STATUS_INVALID_PORT_ID;
  447. }
  448. /* remaining bits are reserved-zero */
  449. if (param & ~HV_CONNECTION_ID_MASK) {
  450. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  451. }
  452. ret = HV_STATUS_INVALID_CONNECTION_ID;
  453. rcu_read_lock();
  454. QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
  455. if (handler->conn_id == param) {
  456. event_notifier_set(handler->notifier);
  457. ret = 0;
  458. break;
  459. }
  460. }
  461. rcu_read_unlock();
  462. return ret;
  463. }