main-loop.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "qemu/osdep.h"
  25. #include "qapi/error.h"
  26. #include "qemu/cutils.h"
  27. #include "qemu/timer.h"
  28. #include "sysemu/qtest.h"
  29. #include "sysemu/cpus.h"
  30. #include "sysemu/replay.h"
  31. #include "qemu/main-loop.h"
  32. #include "block/aio.h"
  33. #include "qemu/error-report.h"
  34. #include "qemu/queue.h"
  35. #ifndef _WIN32
  36. #include <sys/wait.h>
  37. #endif
  38. #ifndef _WIN32
  39. /* If we have signalfd, we mask out the signals we want to handle and then
  40. * use signalfd to listen for them. We rely on whatever the current signal
  41. * handler is to dispatch the signals when we receive them.
  42. */
  43. static void sigfd_handler(void *opaque)
  44. {
  45. int fd = (intptr_t)opaque;
  46. struct qemu_signalfd_siginfo info;
  47. struct sigaction action;
  48. ssize_t len;
  49. while (1) {
  50. do {
  51. len = read(fd, &info, sizeof(info));
  52. } while (len == -1 && errno == EINTR);
  53. if (len == -1 && errno == EAGAIN) {
  54. break;
  55. }
  56. if (len != sizeof(info)) {
  57. error_report("read from sigfd returned %zd: %s", len,
  58. g_strerror(errno));
  59. return;
  60. }
  61. sigaction(info.ssi_signo, NULL, &action);
  62. if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
  63. sigaction_invoke(&action, &info);
  64. } else if (action.sa_handler) {
  65. action.sa_handler(info.ssi_signo);
  66. }
  67. }
  68. }
  69. static int qemu_signal_init(Error **errp)
  70. {
  71. int sigfd;
  72. sigset_t set;
  73. /*
  74. * SIG_IPI must be blocked in the main thread and must not be caught
  75. * by sigwait() in the signal thread. Otherwise, the cpu thread will
  76. * not catch it reliably.
  77. */
  78. sigemptyset(&set);
  79. sigaddset(&set, SIG_IPI);
  80. sigaddset(&set, SIGIO);
  81. sigaddset(&set, SIGALRM);
  82. sigaddset(&set, SIGBUS);
  83. /* SIGINT cannot be handled via signalfd, so that ^C can be used
  84. * to interrupt QEMU when it is being run under gdb. SIGHUP and
  85. * SIGTERM are also handled asynchronously, even though it is not
  86. * strictly necessary, because they use the same handler as SIGINT.
  87. */
  88. pthread_sigmask(SIG_BLOCK, &set, NULL);
  89. sigdelset(&set, SIG_IPI);
  90. sigfd = qemu_signalfd(&set);
  91. if (sigfd == -1) {
  92. error_setg_errno(errp, errno, "failed to create signalfd");
  93. return -errno;
  94. }
  95. fcntl_setfl(sigfd, O_NONBLOCK);
  96. qemu_set_fd_handler(sigfd, sigfd_handler, NULL, (void *)(intptr_t)sigfd);
  97. return 0;
  98. }
  99. #else /* _WIN32 */
  100. static int qemu_signal_init(Error **errp)
  101. {
  102. return 0;
  103. }
  104. #endif
  105. static AioContext *qemu_aio_context;
  106. static QEMUBH *qemu_notify_bh;
  107. static void notify_event_cb(void *opaque)
  108. {
  109. /* No need to do anything; this bottom half is only used to
  110. * kick the kernel out of ppoll/poll/WaitForMultipleObjects.
  111. */
  112. }
  113. AioContext *qemu_get_aio_context(void)
  114. {
  115. return qemu_aio_context;
  116. }
  117. void qemu_notify_event(void)
  118. {
  119. if (!qemu_aio_context) {
  120. return;
  121. }
  122. qemu_bh_schedule(qemu_notify_bh);
  123. }
  124. static GArray *gpollfds;
  125. int qemu_init_main_loop(Error **errp)
  126. {
  127. int ret;
  128. GSource *src;
  129. Error *local_error = NULL;
  130. init_clocks(qemu_timer_notify_cb);
  131. ret = qemu_signal_init(errp);
  132. if (ret) {
  133. return ret;
  134. }
  135. qemu_aio_context = aio_context_new(&local_error);
  136. if (!qemu_aio_context) {
  137. error_propagate(errp, local_error);
  138. return -EMFILE;
  139. }
  140. qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
  141. gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
  142. src = aio_get_g_source(qemu_aio_context);
  143. g_source_set_name(src, "aio-context");
  144. g_source_attach(src, NULL);
  145. g_source_unref(src);
  146. src = iohandler_get_g_source();
  147. g_source_set_name(src, "io-handler");
  148. g_source_attach(src, NULL);
  149. g_source_unref(src);
  150. return 0;
  151. }
  152. static int max_priority;
  153. #ifndef _WIN32
  154. static int glib_pollfds_idx;
  155. static int glib_n_poll_fds;
  156. static void glib_pollfds_fill(int64_t *cur_timeout)
  157. {
  158. GMainContext *context = g_main_context_default();
  159. int timeout = 0;
  160. int64_t timeout_ns;
  161. int n;
  162. g_main_context_prepare(context, &max_priority);
  163. glib_pollfds_idx = gpollfds->len;
  164. n = glib_n_poll_fds;
  165. do {
  166. GPollFD *pfds;
  167. glib_n_poll_fds = n;
  168. g_array_set_size(gpollfds, glib_pollfds_idx + glib_n_poll_fds);
  169. pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
  170. n = g_main_context_query(context, max_priority, &timeout, pfds,
  171. glib_n_poll_fds);
  172. } while (n != glib_n_poll_fds);
  173. if (timeout < 0) {
  174. timeout_ns = -1;
  175. } else {
  176. timeout_ns = (int64_t)timeout * (int64_t)SCALE_MS;
  177. }
  178. *cur_timeout = qemu_soonest_timeout(timeout_ns, *cur_timeout);
  179. }
  180. static void glib_pollfds_poll(void)
  181. {
  182. GMainContext *context = g_main_context_default();
  183. GPollFD *pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
  184. if (g_main_context_check(context, max_priority, pfds, glib_n_poll_fds)) {
  185. g_main_context_dispatch(context);
  186. }
  187. }
  188. #define MAX_MAIN_LOOP_SPIN (1000)
  189. static int os_host_main_loop_wait(int64_t timeout)
  190. {
  191. GMainContext *context = g_main_context_default();
  192. int ret;
  193. g_main_context_acquire(context);
  194. glib_pollfds_fill(&timeout);
  195. qemu_mutex_unlock_iothread();
  196. replay_mutex_unlock();
  197. ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
  198. replay_mutex_lock();
  199. qemu_mutex_lock_iothread();
  200. glib_pollfds_poll();
  201. g_main_context_release(context);
  202. return ret;
  203. }
  204. #else
  205. /***********************************************************/
  206. /* Polling handling */
  207. typedef struct PollingEntry {
  208. PollingFunc *func;
  209. void *opaque;
  210. struct PollingEntry *next;
  211. } PollingEntry;
  212. static PollingEntry *first_polling_entry;
  213. int qemu_add_polling_cb(PollingFunc *func, void *opaque)
  214. {
  215. PollingEntry **ppe, *pe;
  216. pe = g_malloc0(sizeof(PollingEntry));
  217. pe->func = func;
  218. pe->opaque = opaque;
  219. for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
  220. *ppe = pe;
  221. return 0;
  222. }
  223. void qemu_del_polling_cb(PollingFunc *func, void *opaque)
  224. {
  225. PollingEntry **ppe, *pe;
  226. for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
  227. pe = *ppe;
  228. if (pe->func == func && pe->opaque == opaque) {
  229. *ppe = pe->next;
  230. g_free(pe);
  231. break;
  232. }
  233. }
  234. }
  235. /***********************************************************/
  236. /* Wait objects support */
  237. typedef struct WaitObjects {
  238. int num;
  239. int revents[MAXIMUM_WAIT_OBJECTS + 1];
  240. HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
  241. WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
  242. void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
  243. } WaitObjects;
  244. static WaitObjects wait_objects = {0};
  245. int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
  246. {
  247. WaitObjects *w = &wait_objects;
  248. if (w->num >= MAXIMUM_WAIT_OBJECTS) {
  249. return -1;
  250. }
  251. w->events[w->num] = handle;
  252. w->func[w->num] = func;
  253. w->opaque[w->num] = opaque;
  254. w->revents[w->num] = 0;
  255. w->num++;
  256. return 0;
  257. }
  258. void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
  259. {
  260. int i, found;
  261. WaitObjects *w = &wait_objects;
  262. found = 0;
  263. for (i = 0; i < w->num; i++) {
  264. if (w->events[i] == handle) {
  265. found = 1;
  266. }
  267. if (found) {
  268. w->events[i] = w->events[i + 1];
  269. w->func[i] = w->func[i + 1];
  270. w->opaque[i] = w->opaque[i + 1];
  271. w->revents[i] = w->revents[i + 1];
  272. }
  273. }
  274. if (found) {
  275. w->num--;
  276. }
  277. }
  278. void qemu_fd_register(int fd)
  279. {
  280. WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
  281. FD_READ | FD_ACCEPT | FD_CLOSE |
  282. FD_CONNECT | FD_WRITE | FD_OOB);
  283. }
  284. static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
  285. fd_set *xfds)
  286. {
  287. int nfds = -1;
  288. int i;
  289. for (i = 0; i < pollfds->len; i++) {
  290. GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
  291. int fd = pfd->fd;
  292. int events = pfd->events;
  293. if (events & G_IO_IN) {
  294. FD_SET(fd, rfds);
  295. nfds = MAX(nfds, fd);
  296. }
  297. if (events & G_IO_OUT) {
  298. FD_SET(fd, wfds);
  299. nfds = MAX(nfds, fd);
  300. }
  301. if (events & G_IO_PRI) {
  302. FD_SET(fd, xfds);
  303. nfds = MAX(nfds, fd);
  304. }
  305. }
  306. return nfds;
  307. }
  308. static void pollfds_poll(GArray *pollfds, int nfds, fd_set *rfds,
  309. fd_set *wfds, fd_set *xfds)
  310. {
  311. int i;
  312. for (i = 0; i < pollfds->len; i++) {
  313. GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
  314. int fd = pfd->fd;
  315. int revents = 0;
  316. if (FD_ISSET(fd, rfds)) {
  317. revents |= G_IO_IN;
  318. }
  319. if (FD_ISSET(fd, wfds)) {
  320. revents |= G_IO_OUT;
  321. }
  322. if (FD_ISSET(fd, xfds)) {
  323. revents |= G_IO_PRI;
  324. }
  325. pfd->revents = revents & pfd->events;
  326. }
  327. }
  328. static int os_host_main_loop_wait(int64_t timeout)
  329. {
  330. GMainContext *context = g_main_context_default();
  331. GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
  332. int select_ret = 0;
  333. int g_poll_ret, ret, i, n_poll_fds;
  334. PollingEntry *pe;
  335. WaitObjects *w = &wait_objects;
  336. gint poll_timeout;
  337. int64_t poll_timeout_ns;
  338. static struct timeval tv0;
  339. fd_set rfds, wfds, xfds;
  340. int nfds;
  341. g_main_context_acquire(context);
  342. /* XXX: need to suppress polling by better using win32 events */
  343. ret = 0;
  344. for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
  345. ret |= pe->func(pe->opaque);
  346. }
  347. if (ret != 0) {
  348. g_main_context_release(context);
  349. return ret;
  350. }
  351. FD_ZERO(&rfds);
  352. FD_ZERO(&wfds);
  353. FD_ZERO(&xfds);
  354. nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds);
  355. if (nfds >= 0) {
  356. select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
  357. if (select_ret != 0) {
  358. timeout = 0;
  359. }
  360. if (select_ret > 0) {
  361. pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds);
  362. }
  363. }
  364. g_main_context_prepare(context, &max_priority);
  365. n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
  366. poll_fds, ARRAY_SIZE(poll_fds));
  367. g_assert(n_poll_fds + w->num <= ARRAY_SIZE(poll_fds));
  368. for (i = 0; i < w->num; i++) {
  369. poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
  370. poll_fds[n_poll_fds + i].events = G_IO_IN;
  371. }
  372. if (poll_timeout < 0) {
  373. poll_timeout_ns = -1;
  374. } else {
  375. poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS;
  376. }
  377. poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);
  378. qemu_mutex_unlock_iothread();
  379. replay_mutex_unlock();
  380. g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);
  381. replay_mutex_lock();
  382. qemu_mutex_lock_iothread();
  383. if (g_poll_ret > 0) {
  384. for (i = 0; i < w->num; i++) {
  385. w->revents[i] = poll_fds[n_poll_fds + i].revents;
  386. }
  387. for (i = 0; i < w->num; i++) {
  388. if (w->revents[i] && w->func[i]) {
  389. w->func[i](w->opaque[i]);
  390. }
  391. }
  392. }
  393. if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
  394. g_main_context_dispatch(context);
  395. }
  396. g_main_context_release(context);
  397. return select_ret || g_poll_ret;
  398. }
  399. #endif
  400. static NotifierList main_loop_poll_notifiers =
  401. NOTIFIER_LIST_INITIALIZER(main_loop_poll_notifiers);
  402. void main_loop_poll_add_notifier(Notifier *notify)
  403. {
  404. notifier_list_add(&main_loop_poll_notifiers, notify);
  405. }
  406. void main_loop_poll_remove_notifier(Notifier *notify)
  407. {
  408. notifier_remove(notify);
  409. }
  410. void main_loop_wait(int nonblocking)
  411. {
  412. MainLoopPoll mlpoll = {
  413. .state = MAIN_LOOP_POLL_FILL,
  414. .timeout = UINT32_MAX,
  415. .pollfds = gpollfds,
  416. };
  417. int ret;
  418. int64_t timeout_ns;
  419. if (nonblocking) {
  420. mlpoll.timeout = 0;
  421. }
  422. /* poll any events */
  423. g_array_set_size(gpollfds, 0); /* reset for new iteration */
  424. /* XXX: separate device handlers from system ones */
  425. notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
  426. if (mlpoll.timeout == UINT32_MAX) {
  427. timeout_ns = -1;
  428. } else {
  429. timeout_ns = (uint64_t)mlpoll.timeout * (int64_t)(SCALE_MS);
  430. }
  431. timeout_ns = qemu_soonest_timeout(timeout_ns,
  432. timerlistgroup_deadline_ns(
  433. &main_loop_tlg));
  434. ret = os_host_main_loop_wait(timeout_ns);
  435. mlpoll.state = ret < 0 ? MAIN_LOOP_POLL_ERR : MAIN_LOOP_POLL_OK;
  436. notifier_list_notify(&main_loop_poll_notifiers, &mlpoll);
  437. /* CPU thread can infinitely wait for event after
  438. missing the warp */
  439. qemu_start_warp_timer();
  440. qemu_clock_run_all_timers();
  441. }
  442. /* Functions to operate on the main QEMU AioContext. */
  443. QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
  444. {
  445. return aio_bh_new(qemu_aio_context, cb, opaque);
  446. }
  447. /*
  448. * Functions to operate on the I/O handler AioContext.
  449. * This context runs on top of main loop. We can't reuse qemu_aio_context
  450. * because iohandlers mustn't be polled by aio_poll(qemu_aio_context).
  451. */
  452. static AioContext *iohandler_ctx;
  453. static void iohandler_init(void)
  454. {
  455. if (!iohandler_ctx) {
  456. iohandler_ctx = aio_context_new(&error_abort);
  457. }
  458. }
  459. AioContext *iohandler_get_aio_context(void)
  460. {
  461. iohandler_init();
  462. return iohandler_ctx;
  463. }
  464. GSource *iohandler_get_g_source(void)
  465. {
  466. iohandler_init();
  467. return aio_get_g_source(iohandler_ctx);
  468. }
  469. void qemu_set_fd_handler(int fd,
  470. IOHandler *fd_read,
  471. IOHandler *fd_write,
  472. void *opaque)
  473. {
  474. iohandler_init();
  475. aio_set_fd_handler(iohandler_ctx, fd, false,
  476. fd_read, fd_write, NULL, opaque);
  477. }
  478. void event_notifier_set_handler(EventNotifier *e,
  479. EventNotifierHandler *handler)
  480. {
  481. iohandler_init();
  482. aio_set_event_notifier(iohandler_ctx, e, false,
  483. handler, NULL);
  484. }
  485. /* reaping of zombies. right now we're not passing the status to
  486. anyone, but it would be possible to add a callback. */
  487. #ifndef _WIN32
  488. typedef struct ChildProcessRecord {
  489. int pid;
  490. QLIST_ENTRY(ChildProcessRecord) next;
  491. } ChildProcessRecord;
  492. static QLIST_HEAD(, ChildProcessRecord) child_watches =
  493. QLIST_HEAD_INITIALIZER(child_watches);
  494. static QEMUBH *sigchld_bh;
  495. static void sigchld_handler(int signal)
  496. {
  497. qemu_bh_schedule(sigchld_bh);
  498. }
  499. static void sigchld_bh_handler(void *opaque)
  500. {
  501. ChildProcessRecord *rec, *next;
  502. QLIST_FOREACH_SAFE(rec, &child_watches, next, next) {
  503. if (waitpid(rec->pid, NULL, WNOHANG) == rec->pid) {
  504. QLIST_REMOVE(rec, next);
  505. g_free(rec);
  506. }
  507. }
  508. }
  509. static void qemu_init_child_watch(void)
  510. {
  511. struct sigaction act;
  512. sigchld_bh = qemu_bh_new(sigchld_bh_handler, NULL);
  513. memset(&act, 0, sizeof(act));
  514. act.sa_handler = sigchld_handler;
  515. act.sa_flags = SA_NOCLDSTOP;
  516. sigaction(SIGCHLD, &act, NULL);
  517. }
  518. int qemu_add_child_watch(pid_t pid)
  519. {
  520. ChildProcessRecord *rec;
  521. if (!sigchld_bh) {
  522. qemu_init_child_watch();
  523. }
  524. QLIST_FOREACH(rec, &child_watches, next) {
  525. if (rec->pid == pid) {
  526. return 1;
  527. }
  528. }
  529. rec = g_malloc0(sizeof(ChildProcessRecord));
  530. rec->pid = pid;
  531. QLIST_INSERT_HEAD(&child_watches, rec, next);
  532. return 0;
  533. }
  534. #endif