rcu.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. /*
  2. * urcu-mb.c
  3. *
  4. * Userspace RCU library with explicit memory barriers
  5. *
  6. * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  7. * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
  8. * Copyright 2015 Red Hat, Inc.
  9. *
  10. * Ported to QEMU by Paolo Bonzini <pbonzini@redhat.com>
  11. *
  12. * This library is free software; you can redistribute it and/or
  13. * modify it under the terms of the GNU Lesser General Public
  14. * License as published by the Free Software Foundation; either
  15. * version 2.1 of the License, or (at your option) any later version.
  16. *
  17. * This library is distributed in the hope that it will be useful,
  18. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20. * Lesser General Public License for more details.
  21. *
  22. * You should have received a copy of the GNU Lesser General Public
  23. * License along with this library; if not, write to the Free Software
  24. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25. *
  26. * IBM's contributions to this file may be relicensed under LGPLv2 or later.
  27. */
  28. #include "qemu/osdep.h"
  29. #include "qemu/rcu.h"
  30. #include "qemu/atomic.h"
  31. #include "qemu/thread.h"
  32. #include "qemu/main-loop.h"
  33. #if defined(CONFIG_MALLOC_TRIM)
  34. #include <malloc.h>
  35. #endif
  36. /*
  37. * Global grace period counter. Bit 0 is always one in rcu_gp_ctr.
  38. * Bits 1 and above are defined in synchronize_rcu.
  39. */
  40. #define RCU_GP_LOCKED (1UL << 0)
  41. #define RCU_GP_CTR (1UL << 1)
  42. unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
  43. QemuEvent rcu_gp_event;
  44. static QemuMutex rcu_registry_lock;
  45. static QemuMutex rcu_sync_lock;
  46. /*
  47. * Check whether a quiescent state was crossed between the beginning of
  48. * update_counter_and_wait and now.
  49. */
  50. static inline int rcu_gp_ongoing(unsigned long *ctr)
  51. {
  52. unsigned long v;
  53. v = atomic_read(ctr);
  54. return v && (v != rcu_gp_ctr);
  55. }
  56. /* Written to only by each individual reader. Read by both the reader and the
  57. * writers.
  58. */
  59. __thread struct rcu_reader_data rcu_reader;
  60. /* Protected by rcu_registry_lock. */
  61. typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
  62. static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
  63. /* Wait for previous parity/grace period to be empty of readers. */
  64. static void wait_for_readers(void)
  65. {
  66. ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders);
  67. struct rcu_reader_data *index, *tmp;
  68. for (;;) {
  69. /* We want to be notified of changes made to rcu_gp_ongoing
  70. * while we walk the list.
  71. */
  72. qemu_event_reset(&rcu_gp_event);
  73. /* Instead of using atomic_mb_set for index->waiting, and
  74. * atomic_mb_read for index->ctr, memory barriers are placed
  75. * manually since writes to different threads are independent.
  76. * qemu_event_reset has acquire semantics, so no memory barrier
  77. * is needed here.
  78. */
  79. QLIST_FOREACH(index, &registry, node) {
  80. atomic_set(&index->waiting, true);
  81. }
  82. /* Here, order the stores to index->waiting before the loads of
  83. * index->ctr. Pairs with smp_mb_placeholder() in rcu_read_unlock(),
  84. * ensuring that the loads of index->ctr are sequentially consistent.
  85. */
  86. smp_mb_global();
  87. QLIST_FOREACH_SAFE(index, &registry, node, tmp) {
  88. if (!rcu_gp_ongoing(&index->ctr)) {
  89. QLIST_REMOVE(index, node);
  90. QLIST_INSERT_HEAD(&qsreaders, index, node);
  91. /* No need for mb_set here, worst of all we
  92. * get some extra futex wakeups.
  93. */
  94. atomic_set(&index->waiting, false);
  95. }
  96. }
  97. if (QLIST_EMPTY(&registry)) {
  98. break;
  99. }
  100. /* Wait for one thread to report a quiescent state and try again.
  101. * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
  102. * wait too much time.
  103. *
  104. * rcu_register_thread() may add nodes to &registry; it will not
  105. * wake up synchronize_rcu, but that is okay because at least another
  106. * thread must exit its RCU read-side critical section before
  107. * synchronize_rcu is done. The next iteration of the loop will
  108. * move the new thread's rcu_reader from &registry to &qsreaders,
  109. * because rcu_gp_ongoing() will return false.
  110. *
  111. * rcu_unregister_thread() may remove nodes from &qsreaders instead
  112. * of &registry if it runs during qemu_event_wait. That's okay;
  113. * the node then will not be added back to &registry by QLIST_SWAP
  114. * below. The invariant is that the node is part of one list when
  115. * rcu_registry_lock is released.
  116. */
  117. qemu_mutex_unlock(&rcu_registry_lock);
  118. qemu_event_wait(&rcu_gp_event);
  119. qemu_mutex_lock(&rcu_registry_lock);
  120. }
  121. /* put back the reader list in the registry */
  122. QLIST_SWAP(&registry, &qsreaders, node);
  123. }
  124. void synchronize_rcu(void)
  125. {
  126. qemu_mutex_lock(&rcu_sync_lock);
  127. /* Write RCU-protected pointers before reading p_rcu_reader->ctr.
  128. * Pairs with smp_mb_placeholder() in rcu_read_lock().
  129. */
  130. smp_mb_global();
  131. qemu_mutex_lock(&rcu_registry_lock);
  132. if (!QLIST_EMPTY(&registry)) {
  133. /* In either case, the atomic_mb_set below blocks stores that free
  134. * old RCU-protected pointers.
  135. */
  136. if (sizeof(rcu_gp_ctr) < 8) {
  137. /* For architectures with 32-bit longs, a two-subphases algorithm
  138. * ensures we do not encounter overflow bugs.
  139. *
  140. * Switch parity: 0 -> 1, 1 -> 0.
  141. */
  142. atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
  143. wait_for_readers();
  144. atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
  145. } else {
  146. /* Increment current grace period. */
  147. atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
  148. }
  149. wait_for_readers();
  150. }
  151. qemu_mutex_unlock(&rcu_registry_lock);
  152. qemu_mutex_unlock(&rcu_sync_lock);
  153. }
  154. #define RCU_CALL_MIN_SIZE 30
  155. /* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h
  156. * from liburcu. Note that head is only used by the consumer.
  157. */
  158. static struct rcu_head dummy;
  159. static struct rcu_head *head = &dummy, **tail = &dummy.next;
  160. static int rcu_call_count;
  161. static QemuEvent rcu_call_ready_event;
  162. static void enqueue(struct rcu_head *node)
  163. {
  164. struct rcu_head **old_tail;
  165. node->next = NULL;
  166. old_tail = atomic_xchg(&tail, &node->next);
  167. atomic_mb_set(old_tail, node);
  168. }
  169. static struct rcu_head *try_dequeue(void)
  170. {
  171. struct rcu_head *node, *next;
  172. retry:
  173. /* Test for an empty list, which we do not expect. Note that for
  174. * the consumer head and tail are always consistent. The head
  175. * is consistent because only the consumer reads/writes it.
  176. * The tail, because it is the first step in the enqueuing.
  177. * It is only the next pointers that might be inconsistent.
  178. */
  179. if (head == &dummy && atomic_mb_read(&tail) == &dummy.next) {
  180. abort();
  181. }
  182. /* If the head node has NULL in its next pointer, the value is
  183. * wrong and we need to wait until its enqueuer finishes the update.
  184. */
  185. node = head;
  186. next = atomic_mb_read(&head->next);
  187. if (!next) {
  188. return NULL;
  189. }
  190. /* Since we are the sole consumer, and we excluded the empty case
  191. * above, the queue will always have at least two nodes: the
  192. * dummy node, and the one being removed. So we do not need to update
  193. * the tail pointer.
  194. */
  195. head = next;
  196. /* If we dequeued the dummy node, add it back at the end and retry. */
  197. if (node == &dummy) {
  198. enqueue(node);
  199. goto retry;
  200. }
  201. return node;
  202. }
  203. static void *call_rcu_thread(void *opaque)
  204. {
  205. struct rcu_head *node;
  206. rcu_register_thread();
  207. for (;;) {
  208. int tries = 0;
  209. int n = atomic_read(&rcu_call_count);
  210. /* Heuristically wait for a decent number of callbacks to pile up.
  211. * Fetch rcu_call_count now, we only must process elements that were
  212. * added before synchronize_rcu() starts.
  213. */
  214. while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) {
  215. g_usleep(10000);
  216. if (n == 0) {
  217. qemu_event_reset(&rcu_call_ready_event);
  218. n = atomic_read(&rcu_call_count);
  219. if (n == 0) {
  220. #if defined(CONFIG_MALLOC_TRIM)
  221. malloc_trim(4 * 1024 * 1024);
  222. #endif
  223. qemu_event_wait(&rcu_call_ready_event);
  224. }
  225. }
  226. n = atomic_read(&rcu_call_count);
  227. }
  228. atomic_sub(&rcu_call_count, n);
  229. synchronize_rcu();
  230. qemu_mutex_lock_iothread();
  231. while (n > 0) {
  232. node = try_dequeue();
  233. while (!node) {
  234. qemu_mutex_unlock_iothread();
  235. qemu_event_reset(&rcu_call_ready_event);
  236. node = try_dequeue();
  237. if (!node) {
  238. qemu_event_wait(&rcu_call_ready_event);
  239. node = try_dequeue();
  240. }
  241. qemu_mutex_lock_iothread();
  242. }
  243. n--;
  244. node->func(node);
  245. }
  246. qemu_mutex_unlock_iothread();
  247. }
  248. abort();
  249. }
  250. void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
  251. {
  252. node->func = func;
  253. enqueue(node);
  254. atomic_inc(&rcu_call_count);
  255. qemu_event_set(&rcu_call_ready_event);
  256. }
  257. void rcu_register_thread(void)
  258. {
  259. assert(rcu_reader.ctr == 0);
  260. qemu_mutex_lock(&rcu_registry_lock);
  261. QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
  262. qemu_mutex_unlock(&rcu_registry_lock);
  263. }
  264. void rcu_unregister_thread(void)
  265. {
  266. qemu_mutex_lock(&rcu_registry_lock);
  267. QLIST_REMOVE(&rcu_reader, node);
  268. qemu_mutex_unlock(&rcu_registry_lock);
  269. }
  270. static void rcu_init_complete(void)
  271. {
  272. QemuThread thread;
  273. qemu_mutex_init(&rcu_registry_lock);
  274. qemu_mutex_init(&rcu_sync_lock);
  275. qemu_event_init(&rcu_gp_event, true);
  276. qemu_event_init(&rcu_call_ready_event, false);
  277. /* The caller is assumed to have iothread lock, so the call_rcu thread
  278. * must have been quiescent even after forking, just recreate it.
  279. */
  280. qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
  281. NULL, QEMU_THREAD_DETACHED);
  282. rcu_register_thread();
  283. }
  284. static int atfork_depth = 1;
  285. void rcu_enable_atfork(void)
  286. {
  287. atfork_depth++;
  288. }
  289. void rcu_disable_atfork(void)
  290. {
  291. atfork_depth--;
  292. }
  293. #ifdef CONFIG_POSIX
  294. static void rcu_init_lock(void)
  295. {
  296. if (atfork_depth < 1) {
  297. return;
  298. }
  299. qemu_mutex_lock(&rcu_sync_lock);
  300. qemu_mutex_lock(&rcu_registry_lock);
  301. }
  302. static void rcu_init_unlock(void)
  303. {
  304. if (atfork_depth < 1) {
  305. return;
  306. }
  307. qemu_mutex_unlock(&rcu_registry_lock);
  308. qemu_mutex_unlock(&rcu_sync_lock);
  309. }
  310. static void rcu_init_child(void)
  311. {
  312. if (atfork_depth < 1) {
  313. return;
  314. }
  315. memset(&registry, 0, sizeof(registry));
  316. rcu_init_complete();
  317. }
  318. #endif
  319. static void __attribute__((__constructor__)) rcu_init(void)
  320. {
  321. smp_mb_global_init();
  322. #ifdef CONFIG_POSIX
  323. pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_child);
  324. #endif
  325. rcu_init_complete();
  326. }