main-loop.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "qemu/osdep.h"
  25. #include "qapi/error.h"
  26. #include "qemu/cutils.h"
  27. #include "qemu/timer.h"
  28. #include "sysemu/cpu-timers.h"
  29. #include "sysemu/replay.h"
  30. #include "qemu/main-loop.h"
  31. #include "block/aio.h"
  32. #include "block/thread-pool.h"
  33. #include "qemu/error-report.h"
  34. #include "qemu/queue.h"
  35. #include "qom/object.h"
  36. #ifndef _WIN32
  37. #include <sys/wait.h>
  38. #endif
  39. #ifndef _WIN32
  40. /* If we have signalfd, we mask out the signals we want to handle and then
  41. * use signalfd to listen for them. We rely on whatever the current signal
  42. * handler is to dispatch the signals when we receive them.
  43. */
  44. /*
  45. * Disable CFI checks.
  46. * We are going to call a signal handler directly. Such handler may or may not
  47. * have been defined in our binary, so there's no guarantee that the pointer
  48. * used to set the handler is a cfi-valid pointer. Since the handlers are
  49. * stored in kernel memory, changing the handler to an attacker-defined
  50. * function requires being able to call a sigaction() syscall,
  51. * which is not as easy as overwriting a pointer in memory.
  52. */
  53. QEMU_DISABLE_CFI
  54. static void sigfd_handler(void *opaque)
  55. {
  56. int fd = (intptr_t)opaque;
  57. struct qemu_signalfd_siginfo info;
  58. struct sigaction action;
  59. ssize_t len;
  60. while (1) {
  61. len = RETRY_ON_EINTR(read(fd, &info, sizeof(info)));
  62. if (len == -1 && errno == EAGAIN) {
  63. break;
  64. }
  65. if (len != sizeof(info)) {
  66. error_report("read from sigfd returned %zd: %s", len,
  67. g_strerror(errno));
  68. return;
  69. }
  70. sigaction(info.ssi_signo, NULL, &action);
  71. if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
  72. sigaction_invoke(&action, &info);
  73. } else if (action.sa_handler) {
  74. action.sa_handler(info.ssi_signo);
  75. }
  76. }
  77. }
  78. static int qemu_signal_init(Error **errp)
  79. {
  80. int sigfd;
  81. sigset_t set;
  82. /*
  83. * SIG_IPI must be blocked in the main thread and must not be caught
  84. * by sigwait() in the signal thread. Otherwise, the cpu thread will
  85. * not catch it reliably.
  86. */
  87. sigemptyset(&set);
  88. sigaddset(&set, SIG_IPI);
  89. sigaddset(&set, SIGIO);
  90. sigaddset(&set, SIGALRM);
  91. sigaddset(&set, SIGBUS);
  92. /* SIGINT cannot be handled via signalfd, so that ^C can be used
  93. * to interrupt QEMU when it is being run under gdb. SIGHUP and
  94. * SIGTERM are also handled asynchronously, even though it is not
  95. * strictly necessary, because they use the same handler as SIGINT.
  96. */
  97. pthread_sigmask(SIG_BLOCK, &set, NULL);
  98. sigdelset(&set, SIG_IPI);
  99. sigfd = qemu_signalfd(&set);
  100. if (sigfd == -1) {
  101. error_setg_errno(errp, errno, "failed to create signalfd");
  102. return -errno;
  103. }
  104. g_unix_set_fd_nonblocking(sigfd, true, NULL);
  105. qemu_set_fd_handler(sigfd, sigfd_handler, NULL, (void *)(intptr_t)sigfd);
  106. return 0;
  107. }
  108. #else /* _WIN32 */
  109. static int qemu_signal_init(Error **errp)
  110. {
  111. return 0;
  112. }
  113. #endif
  114. static AioContext *qemu_aio_context;
  115. static QEMUBH *qemu_notify_bh;
  116. static void notify_event_cb(void *opaque)
  117. {
  118. /* No need to do anything; this bottom half is only used to
  119. * kick the kernel out of ppoll/poll/WaitForMultipleObjects.
  120. */
  121. }
  122. AioContext *qemu_get_aio_context(void)
  123. {
  124. return qemu_aio_context;
  125. }
  126. void qemu_notify_event(void)
  127. {
  128. if (!qemu_aio_context) {
  129. return;
  130. }
  131. qemu_bh_schedule(qemu_notify_bh);
  132. }
  133. static GArray *gpollfds;
  134. int qemu_init_main_loop(Error **errp)
  135. {
  136. int ret;
  137. GSource *src;
  138. init_clocks(qemu_timer_notify_cb);
  139. ret = qemu_signal_init(errp);
  140. if (ret) {
  141. return ret;
  142. }
  143. qemu_aio_context = aio_context_new(errp);
  144. if (!qemu_aio_context) {
  145. return -EMFILE;
  146. }
  147. qemu_set_current_aio_context(qemu_aio_context);
  148. qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
  149. gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
  150. src = aio_get_g_source(qemu_aio_context);
  151. g_source_set_name(src, "aio-context");
  152. g_source_attach(src, NULL);
  153. g_source_unref(src);
  154. src = iohandler_get_g_source();
  155. g_source_set_name(src, "io-handler");
  156. g_source_attach(src, NULL);
  157. g_source_unref(src);
  158. return 0;
  159. }
  160. static void main_loop_update_params(EventLoopBase *base, Error **errp)
  161. {
  162. ERRP_GUARD();
  163. if (!qemu_aio_context) {
  164. error_setg(errp, "qemu aio context not ready");
  165. return;
  166. }
  167. aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch);
  168. aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min,
  169. base->thread_pool_max, errp);
  170. }
  171. MainLoop *mloop;
  172. static void main_loop_init(EventLoopBase *base, Error **errp)
  173. {
  174. MainLoop *m = MAIN_LOOP(base);
  175. if (mloop) {
  176. error_setg(errp, "only one main-loop instance allowed");
  177. return;
  178. }
  179. main_loop_update_params(base, errp);
  180. mloop = m;
  181. return;
  182. }
  183. static bool main_loop_can_be_deleted(EventLoopBase *base)
  184. {
  185. return false;
  186. }
  187. static void main_loop_class_init(ObjectClass *oc, void *class_data)
  188. {
  189. EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc);
  190. bc->init = main_loop_init;
  191. bc->update_params = main_loop_update_params;
  192. bc->can_be_deleted = main_loop_can_be_deleted;
  193. }
  194. static const TypeInfo main_loop_info = {
  195. .name = TYPE_MAIN_LOOP,
  196. .parent = TYPE_EVENT_LOOP_BASE,
  197. .class_init = main_loop_class_init,
  198. .instance_size = sizeof(MainLoop),
  199. };
  200. static void main_loop_register_types(void)
  201. {
  202. type_register_static(&main_loop_info);
  203. }
  204. type_init(main_loop_register_types)
  205. static int max_priority;
  206. #ifndef _WIN32
  207. static int glib_pollfds_idx;
  208. static int glib_n_poll_fds;
  209. static void glib_pollfds_fill(int64_t *cur_timeout)
  210. {
  211. GMainContext *context = g_main_context_default();
  212. int timeout = 0;
  213. int64_t timeout_ns;
  214. int n;
  215. g_main_context_prepare(context, &max_priority);
  216. glib_pollfds_idx = gpollfds->len;
  217. n = glib_n_poll_fds;
  218. do {
  219. GPollFD *pfds;
  220. glib_n_poll_fds = n;
  221. g_array_set_size(gpollfds, glib_pollfds_idx + glib_n_poll_fds);
  222. pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
  223. n = g_main_context_query(context, max_priority, &timeout, pfds,
  224. glib_n_poll_fds);
  225. } while (n != glib_n_poll_fds);
  226. if (timeout < 0) {
  227. timeout_ns = -1;
  228. } else {
  229. timeout_ns = (int64_t)timeout * (int64_t)SCALE_MS;
  230. }
  231. *cur_timeout = qemu_soonest_timeout(timeout_ns, *cur_timeout);
  232. }
  233. static void glib_pollfds_poll(void)
  234. {
  235. GMainContext *context = g_main_context_default();
  236. GPollFD *pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
  237. if (g_main_context_check(context, max_priority, pfds, glib_n_poll_fds)) {
  238. g_main_context_dispatch(context);
  239. }
  240. }
  241. #define MAX_MAIN_LOOP_SPIN (1000)
  242. static int os_host_main_loop_wait(int64_t timeout)
  243. {
  244. GMainContext *context = g_main_context_default();
  245. int ret;
  246. g_main_context_acquire(context);
  247. glib_pollfds_fill(&timeout);
  248. bql_unlock();
  249. replay_mutex_unlock();
  250. ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
  251. replay_mutex_lock();
  252. bql_lock();
  253. glib_pollfds_poll();
  254. g_main_context_release(context);
  255. return ret;
  256. }
  257. #else
  258. /***********************************************************/
  259. /* Polling handling */
  260. typedef struct PollingEntry {
  261. PollingFunc *func;
  262. void *opaque;
  263. struct PollingEntry *next;
  264. } PollingEntry;
  265. static PollingEntry *first_polling_entry;
  266. int qemu_add_polling_cb(PollingFunc *func, void *opaque)
  267. {
  268. PollingEntry **ppe, *pe;
  269. pe = g_new0(PollingEntry, 1);
  270. pe->func = func;
  271. pe->opaque = opaque;
  272. for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
  273. *ppe = pe;
  274. return 0;
  275. }
  276. void qemu_del_polling_cb(PollingFunc *func, void *opaque)
  277. {
  278. PollingEntry **ppe, *pe;
  279. for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
  280. pe = *ppe;
  281. if (pe->func == func && pe->opaque == opaque) {
  282. *ppe = pe->next;
  283. g_free(pe);
  284. break;
  285. }
  286. }
  287. }
  288. /***********************************************************/
  289. /* Wait objects support */
  290. typedef struct WaitObjects {
  291. int num;
  292. int revents[MAXIMUM_WAIT_OBJECTS];
  293. HANDLE events[MAXIMUM_WAIT_OBJECTS];
  294. WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS];
  295. void *opaque[MAXIMUM_WAIT_OBJECTS];
  296. } WaitObjects;
  297. static WaitObjects wait_objects = {0};
  298. int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
  299. {
  300. int i;
  301. WaitObjects *w = &wait_objects;
  302. if (w->num >= MAXIMUM_WAIT_OBJECTS) {
  303. return -1;
  304. }
  305. for (i = 0; i < w->num; i++) {
  306. /* check if the same handle is added twice */
  307. if (w->events[i] == handle) {
  308. return -1;
  309. }
  310. }
  311. w->events[w->num] = handle;
  312. w->func[w->num] = func;
  313. w->opaque[w->num] = opaque;
  314. w->revents[w->num] = 0;
  315. w->num++;
  316. return 0;
  317. }
  318. void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
  319. {
  320. int i, found;
  321. WaitObjects *w = &wait_objects;
  322. found = 0;
  323. for (i = 0; i < w->num; i++) {
  324. if (w->events[i] == handle) {
  325. found = 1;
  326. }
  327. if (found && i < (MAXIMUM_WAIT_OBJECTS - 1)) {
  328. w->events[i] = w->events[i + 1];
  329. w->func[i] = w->func[i + 1];
  330. w->opaque[i] = w->opaque[i + 1];
  331. w->revents[i] = w->revents[i + 1];
  332. }
  333. }
  334. if (found) {
  335. w->num--;
  336. }
  337. }
  338. static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
  339. fd_set *xfds)
  340. {
  341. int nfds = -1;
  342. int i;
  343. for (i = 0; i < pollfds->len; i++) {
  344. GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
  345. int fd = pfd->fd;
  346. int events = pfd->events;
  347. if (events & G_IO_IN) {
  348. FD_SET(fd, rfds);
  349. nfds = MAX(nfds, fd);
  350. }
  351. if (events & G_IO_OUT) {
  352. FD_SET(fd, wfds);
  353. nfds = MAX(nfds, fd);
  354. }
  355. if (events & G_IO_PRI) {
  356. FD_SET(fd, xfds);
  357. nfds = MAX(nfds, fd);
  358. }
  359. }
  360. return nfds;
  361. }
  362. static void pollfds_poll(GArray *pollfds, int nfds, fd_set *rfds,
  363. fd_set *wfds, fd_set *xfds)
  364. {
  365. int i;
  366. for (i = 0; i < pollfds->len; i++) {
  367. GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
  368. int fd = pfd->fd;
  369. int revents = 0;
  370. if (FD_ISSET(fd, rfds)) {
  371. revents |= G_IO_IN;
  372. }
  373. if (FD_ISSET(fd, wfds)) {
  374. revents |= G_IO_OUT;
  375. }
  376. if (FD_ISSET(fd, xfds)) {
  377. revents |= G_IO_PRI;
  378. }
  379. pfd->revents = revents & pfd->events;
  380. }
  381. }
  382. static int os_host_main_loop_wait(int64_t timeout)
  383. {
  384. GMainContext *context = g_main_context_default();
  385. GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
  386. int select_ret = 0;
  387. int g_poll_ret, ret, i, n_poll_fds;
  388. PollingEntry *pe;
  389. WaitObjects *w = &wait_objects;
  390. gint poll_timeout;
  391. int64_t poll_timeout_ns;
  392. static struct timeval tv0;
  393. fd_set rfds, wfds, xfds;
  394. int nfds;
  395. g_main_context_acquire(context);
  396. /* XXX: need to suppress polling by better using win32 events */
  397. ret = 0;
  398. for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
  399. ret |= pe->func(pe->opaque);
  400. }
  401. if (ret != 0) {
  402. g_main_context_release(context);
  403. return ret;
  404. }
  405. FD_ZERO(&rfds);
  406. FD_ZERO(&wfds);
  407. FD_ZERO(&xfds);
  408. nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds);
  409. if (nfds >= 0) {
  410. select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
  411. if (select_ret != 0) {
  412. timeout = 0;
  413. }
  414. if (select_ret > 0) {
  415. pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds);
  416. }
  417. }
  418. g_main_context_prepare(context, &max_priority);
  419. n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
  420. poll_fds, ARRAY_SIZE(poll_fds));
  421. g_assert(n_poll_fds + w->num <= ARRAY_SIZE(poll_fds));
  422. for (i = 0; i < w->num; i++) {
  423. poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
  424. poll_fds[n_poll_fds + i].events = G_IO_IN;
  425. }
  426. if (poll_timeout < 0) {
  427. poll_timeout_ns = -1;
  428. } else {
  429. poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS;
  430. }
  431. poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
  432. bql_unlock();
  433. replay_mutex_unlock();
  434. g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
  435. replay_mutex_lock();
  436. bql_lock();
  437. if (g_poll_ret > 0) {
  438. for (i = 0; i < w->num; i++) {
  439. w->revents[i] = poll_fds[n_poll_fds + i].revents;
  440. }
  441. for (i = 0; i < w->num; i++) {
  442. if (w->revents[i] && w->func[i]) {
  443. w->func[i](w->opaque[i]);
  444. }
  445. }
  446. }
  447. if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
  448. g_main_context_dispatch(context);
  449. }
  450. g_main_context_release(context);
  451. return select_ret || g_poll_ret;
  452. }
  453. #endif
  454. static NotifierList main_loop_poll_notifiers =
  455. NOTIFIER_LIST_INITIALIZER(main_loop_poll_notifiers);
  456. void main_loop_poll_add_notifier(Notifier *notify)
  457. {
  458. notifier_list_add(&main_loop_poll_notifiers, notify);
  459. }
  460. void main_loop_poll_remove_notifier(Notifier *notify)
  461. {
  462. notifier_remove(notify);
  463. }
  464. void main_loop_wait(int nonblocking)
  465. {
  466. MainLoopPoll mlpoll = {
  467. .state = MAIN_LOOP_POLL_FILL,
  468. .timeout = UINT32_MAX,
  469. .pollfds = gpollfds,
  470. };
  471. int ret;
  472. int64_t timeout_ns;
  473. if (nonblocking) {
  474. mlpoll.timeout = 0;
  475. }
  476. /* poll any events */
  477. g_array_set_size(gpollfds, 0); /* reset for new iteration */
  478. /* XXX: separate device handlers from system ones */
  479. notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
  480. if (mlpoll.timeout == UINT32_MAX) {
  481. timeout_ns = -1;
  482. } else {
  483. timeout_ns = (uint64_t)mlpoll.timeout * (int64_t)(SCALE_MS);
  484. }
  485. timeout_ns = qemu_soonest_timeout(timeout_ns,
  486. timerlistgroup_deadline_ns(
  487. &main_loop_tlg));
  488. ret = os_host_main_loop_wait(timeout_ns);
  489. mlpoll.state = ret < 0 ? MAIN_LOOP_POLL_ERR : MAIN_LOOP_POLL_OK;
  490. notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
  491. if (icount_enabled()) {
  492. /*
  493. * CPU thread can infinitely wait for event after
  494. * missing the warp
  495. */
  496. icount_start_warp_timer();
  497. }
  498. qemu_clock_run_all_timers();
  499. }
  500. /* Functions to operate on the main QEMU AioContext. */
  501. QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name,
  502. MemReentrancyGuard *reentrancy_guard)
  503. {
  504. return aio_bh_new_full(qemu_aio_context, cb, opaque, name,
  505. reentrancy_guard);
  506. }
  507. /*
  508. * Functions to operate on the I/O handler AioContext.
  509. * This context runs on top of main loop. We can't reuse qemu_aio_context
  510. * because iohandlers mustn't be polled by aio_poll(qemu_aio_context).
  511. */
  512. static AioContext *iohandler_ctx;
  513. static void iohandler_init(void)
  514. {
  515. if (!iohandler_ctx) {
  516. iohandler_ctx = aio_context_new(&error_abort);
  517. }
  518. }
  519. AioContext *iohandler_get_aio_context(void)
  520. {
  521. iohandler_init();
  522. return iohandler_ctx;
  523. }
  524. GSource *iohandler_get_g_source(void)
  525. {
  526. iohandler_init();
  527. return aio_get_g_source(iohandler_ctx);
  528. }
  529. void qemu_set_fd_handler(int fd,
  530. IOHandler *fd_read,
  531. IOHandler *fd_write,
  532. void *opaque)
  533. {
  534. iohandler_init();
  535. aio_set_fd_handler(iohandler_ctx, fd, fd_read, fd_write, NULL, NULL,
  536. opaque);
  537. }
  538. void event_notifier_set_handler(EventNotifier *e,
  539. EventNotifierHandler *handler)
  540. {
  541. iohandler_init();
  542. aio_set_event_notifier(iohandler_ctx, e, handler, NULL, NULL);
  543. }