qsp.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815
  1. /*
  2. * qsp.c - QEMU Synchronization Profiler
  3. *
  4. * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
  5. *
  6. * License: GNU GPL, version 2 or later.
  7. * See the COPYING file in the top-level directory.
  8. *
  9. * QSP profiles the time spent in synchronization primitives, which can
  10. * help diagnose performance problems, e.g. scalability issues when
  11. * contention is high.
  12. *
  13. * The primitives currently supported are mutexes, recursive mutexes and
  14. * condition variables. Note that not all related functions are intercepted;
  15. * instead we profile only those functions that can have a performance impact,
  16. * either due to blocking (e.g. cond_wait, mutex_lock) or cache line
  17. * contention (e.g. mutex_lock, mutex_trylock).
  18. *
  19. * QSP's design focuses on speed and scalability. This is achieved
  20. * by having threads do their profiling entirely on thread-local data.
  21. * The appropriate thread-local data is found via a QHT, i.e. a concurrent hash
  22. * table. To aggregate data in order to generate a report, we iterate over
  23. * all entries in the hash table. Depending on the number of threads and
  24. * synchronization objects this might be expensive, but note that it is
  25. * very rarely called -- reports are generated only when requested by users.
  26. *
  27. * Reports are generated as a table where each row represents a call site. A
  28. * call site is the triplet formed by the __file__ and __LINE__ of the caller
  29. * as well as the address of the "object" (i.e. mutex, rec. mutex or condvar)
  30. * being operated on. Optionally, call sites that operate on different objects
  31. * of the same type can be coalesced, which can be particularly useful when
  32. * profiling dynamically-allocated objects.
  33. *
  34. * Alternative designs considered:
  35. *
  36. * - Use an off-the-shelf profiler such as mutrace. This is not a viable option
  37. * for us because QEMU has __malloc_hook set (by one of the libraries it
  38. * uses); leaving this hook unset is required to avoid deadlock in mutrace.
  39. *
  40. * - Use a glib HT for each thread, protecting each HT with its own lock.
  41. * This isn't simpler than the current design, and is 10% slower in the
  42. * atomic_add-bench microbenchmark (-m option).
  43. *
  44. * - For reports, just use a binary tree as we aggregate data, instead of having
  45. * an intermediate hash table. This would simplify the code only slightly, but
  46. * would perform badly if there were many threads and objects to track.
  47. *
  48. * - Wrap operations on qsp entries with RCU read-side critical sections, so
  49. * that qsp_reset() can delete entries. Unfortunately, the overhead of calling
  50. * rcu_read_lock/unlock slows down atomic_add-bench -m by 24%. Having
  51. * a snapshot that is updated on qsp_reset() avoids this overhead.
  52. *
  53. * Related Work:
  54. * - Lennart Poettering's mutrace: http://0pointer.de/blog/projects/mutrace.html
  55. * - Lozi, David, Thomas, Lawall and Muller. "Remote Core Locking: Migrating
  56. * Critical-Section Execution to Improve the Performance of Multithreaded
  57. * Applications", USENIX ATC'12.
  58. */
  59. #include "qemu/osdep.h"
  60. #include "qemu/qemu-print.h"
  61. #include "qemu/thread.h"
  62. #include "qemu/timer.h"
  63. #include "qemu/qht.h"
  64. #include "qemu/rcu.h"
  65. #include "qemu/xxhash.h"
  66. enum QSPType {
  67. QSP_MUTEX,
  68. QSP_BQL_MUTEX,
  69. QSP_REC_MUTEX,
  70. QSP_CONDVAR,
  71. };
  72. struct QSPCallSite {
  73. const void *obj;
  74. const char *file; /* i.e. __FILE__; shortened later */
  75. int line;
  76. enum QSPType type;
  77. };
  78. typedef struct QSPCallSite QSPCallSite;
  79. struct QSPEntry {
  80. void *thread_ptr;
  81. const QSPCallSite *callsite;
  82. uint64_t n_acqs;
  83. uint64_t ns;
  84. unsigned int n_objs; /* count of coalesced objs; only used for reporting */
  85. };
  86. typedef struct QSPEntry QSPEntry;
  87. struct QSPSnapshot {
  88. struct rcu_head rcu;
  89. struct qht ht;
  90. };
  91. typedef struct QSPSnapshot QSPSnapshot;
  92. /* initial sizing for hash tables */
  93. #define QSP_INITIAL_SIZE 64
  94. /* If this file is moved, QSP_REL_PATH should be updated accordingly */
  95. #define QSP_REL_PATH "util/qsp.c"
  96. /* this file's full path. Used to present all call sites with relative paths */
  97. static size_t qsp_qemu_path_len;
  98. /* the address of qsp_thread gives us a unique 'thread ID' */
  99. static __thread int qsp_thread;
  100. /*
  101. * Call sites are the same for all threads, so we track them in a separate hash
  102. * table to save memory.
  103. */
  104. static struct qht qsp_callsite_ht;
  105. static struct qht qsp_ht;
  106. static QSPSnapshot *qsp_snapshot;
  107. static bool qsp_initialized, qsp_initializing;
  108. static const char * const qsp_typenames[] = {
  109. [QSP_MUTEX] = "mutex",
  110. [QSP_BQL_MUTEX] = "BQL mutex",
  111. [QSP_REC_MUTEX] = "rec_mutex",
  112. [QSP_CONDVAR] = "condvar",
  113. };
  114. QemuMutexLockFunc qemu_bql_mutex_lock_func = qemu_mutex_lock_impl;
  115. QemuMutexLockFunc qemu_mutex_lock_func = qemu_mutex_lock_impl;
  116. QemuMutexTrylockFunc qemu_mutex_trylock_func = qemu_mutex_trylock_impl;
  117. QemuRecMutexLockFunc qemu_rec_mutex_lock_func = qemu_rec_mutex_lock_impl;
  118. QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func =
  119. qemu_rec_mutex_trylock_impl;
  120. QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl;
  121. QemuCondTimedWaitFunc qemu_cond_timedwait_func = qemu_cond_timedwait_impl;
  122. /*
  123. * It pays off to _not_ hash callsite->file; hashing a string is slow, and
  124. * without it we still get a pretty unique hash.
  125. */
  126. static inline
  127. uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t ab)
  128. {
  129. uint64_t cd = (uint64_t)(uintptr_t)callsite->obj;
  130. uint32_t e = callsite->line;
  131. uint32_t f = callsite->type;
  132. return qemu_xxhash6(ab, cd, e, f);
  133. }
  134. static inline
  135. uint32_t qsp_callsite_hash(const QSPCallSite *callsite)
  136. {
  137. return do_qsp_callsite_hash(callsite, 0);
  138. }
  139. static inline uint32_t do_qsp_entry_hash(const QSPEntry *entry, uint64_t a)
  140. {
  141. return do_qsp_callsite_hash(entry->callsite, a);
  142. }
  143. static uint32_t qsp_entry_hash(const QSPEntry *entry)
  144. {
  145. return do_qsp_entry_hash(entry, (uint64_t)(uintptr_t)entry->thread_ptr);
  146. }
  147. static uint32_t qsp_entry_no_thread_hash(const QSPEntry *entry)
  148. {
  149. return do_qsp_entry_hash(entry, 0);
  150. }
  151. /* without the objects we need to hash the file name to get a decent hash */
  152. static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry)
  153. {
  154. const QSPCallSite *callsite = entry->callsite;
  155. uint64_t ab = g_str_hash(callsite->file);
  156. uint64_t cd = callsite->line;
  157. uint32_t e = callsite->type;
  158. return qemu_xxhash5(ab, cd, e);
  159. }
  160. static bool qsp_callsite_cmp(const void *ap, const void *bp)
  161. {
  162. const QSPCallSite *a = ap;
  163. const QSPCallSite *b = bp;
  164. return a == b ||
  165. (a->obj == b->obj &&
  166. a->line == b->line &&
  167. a->type == b->type &&
  168. (a->file == b->file || !strcmp(a->file, b->file)));
  169. }
  170. static bool qsp_callsite_no_obj_cmp(const void *ap, const void *bp)
  171. {
  172. const QSPCallSite *a = ap;
  173. const QSPCallSite *b = bp;
  174. return a == b ||
  175. (a->line == b->line &&
  176. a->type == b->type &&
  177. (a->file == b->file || !strcmp(a->file, b->file)));
  178. }
  179. static bool qsp_entry_no_thread_cmp(const void *ap, const void *bp)
  180. {
  181. const QSPEntry *a = ap;
  182. const QSPEntry *b = bp;
  183. return qsp_callsite_cmp(a->callsite, b->callsite);
  184. }
  185. static bool qsp_entry_no_thread_obj_cmp(const void *ap, const void *bp)
  186. {
  187. const QSPEntry *a = ap;
  188. const QSPEntry *b = bp;
  189. return qsp_callsite_no_obj_cmp(a->callsite, b->callsite);
  190. }
  191. static bool qsp_entry_cmp(const void *ap, const void *bp)
  192. {
  193. const QSPEntry *a = ap;
  194. const QSPEntry *b = bp;
  195. return a->thread_ptr == b->thread_ptr &&
  196. qsp_callsite_cmp(a->callsite, b->callsite);
  197. }
  198. /*
  199. * Normally we'd call this from a constructor function, but we want it to work
  200. * via libutil as well.
  201. */
  202. static void qsp_do_init(void)
  203. {
  204. /* make sure this file's path in the tree is up to date with QSP_REL_PATH */
  205. g_assert(strstr(__FILE__, QSP_REL_PATH));
  206. qsp_qemu_path_len = strlen(__FILE__) - strlen(QSP_REL_PATH);
  207. qht_init(&qsp_ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
  208. QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
  209. qht_init(&qsp_callsite_ht, qsp_callsite_cmp, QSP_INITIAL_SIZE,
  210. QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
  211. }
  212. static __attribute__((noinline)) void qsp_init__slowpath(void)
  213. {
  214. if (atomic_cmpxchg(&qsp_initializing, false, true) == false) {
  215. qsp_do_init();
  216. atomic_set(&qsp_initialized, true);
  217. } else {
  218. while (!atomic_read(&qsp_initialized)) {
  219. cpu_relax();
  220. }
  221. }
  222. }
  223. /* qsp_init() must be called from _all_ exported functions */
  224. static inline void qsp_init(void)
  225. {
  226. if (likely(atomic_read(&qsp_initialized))) {
  227. return;
  228. }
  229. qsp_init__slowpath();
  230. }
  231. static QSPCallSite *qsp_callsite_find(const QSPCallSite *orig)
  232. {
  233. QSPCallSite *callsite;
  234. uint32_t hash;
  235. hash = qsp_callsite_hash(orig);
  236. callsite = qht_lookup(&qsp_callsite_ht, orig, hash);
  237. if (callsite == NULL) {
  238. void *existing = NULL;
  239. callsite = g_new(QSPCallSite, 1);
  240. memcpy(callsite, orig, sizeof(*callsite));
  241. qht_insert(&qsp_callsite_ht, callsite, hash, &existing);
  242. if (unlikely(existing)) {
  243. g_free(callsite);
  244. callsite = existing;
  245. }
  246. }
  247. return callsite;
  248. }
  249. static QSPEntry *
  250. qsp_entry_create(struct qht *ht, const QSPEntry *entry, uint32_t hash)
  251. {
  252. QSPEntry *e;
  253. void *existing = NULL;
  254. e = g_new0(QSPEntry, 1);
  255. e->thread_ptr = entry->thread_ptr;
  256. e->callsite = qsp_callsite_find(entry->callsite);
  257. qht_insert(ht, e, hash, &existing);
  258. if (unlikely(existing)) {
  259. g_free(e);
  260. e = existing;
  261. }
  262. return e;
  263. }
  264. static QSPEntry *
  265. qsp_entry_find(struct qht *ht, const QSPEntry *entry, uint32_t hash)
  266. {
  267. QSPEntry *e;
  268. e = qht_lookup(ht, entry, hash);
  269. if (e == NULL) {
  270. e = qsp_entry_create(ht, entry, hash);
  271. }
  272. return e;
  273. }
  274. /*
  275. * Note: Entries are never removed, so callers do not have to be in an RCU
  276. * read-side critical section.
  277. */
  278. static QSPEntry *qsp_entry_get(const void *obj, const char *file, int line,
  279. enum QSPType type)
  280. {
  281. QSPCallSite callsite = {
  282. .obj = obj,
  283. .file = file,
  284. .line = line,
  285. .type = type,
  286. };
  287. QSPEntry orig;
  288. uint32_t hash;
  289. qsp_init();
  290. orig.thread_ptr = &qsp_thread;
  291. orig.callsite = &callsite;
  292. hash = qsp_entry_hash(&orig);
  293. return qsp_entry_find(&qsp_ht, &orig, hash);
  294. }
  295. /*
  296. * @e is in the global hash table; it is only written to by the current thread,
  297. * so we write to it atomically (as in "write once") to prevent torn reads.
  298. */
  299. static inline void do_qsp_entry_record(QSPEntry *e, int64_t delta, bool acq)
  300. {
  301. atomic_set_u64(&e->ns, e->ns + delta);
  302. if (acq) {
  303. atomic_set_u64(&e->n_acqs, e->n_acqs + 1);
  304. }
  305. }
  306. static inline void qsp_entry_record(QSPEntry *e, int64_t delta)
  307. {
  308. do_qsp_entry_record(e, delta, true);
  309. }
  310. #define QSP_GEN_VOID(type_, qsp_t_, func_, impl_) \
  311. static void func_(type_ *obj, const char *file, int line) \
  312. { \
  313. QSPEntry *e; \
  314. int64_t t0, t1; \
  315. \
  316. t0 = get_clock(); \
  317. impl_(obj, file, line); \
  318. t1 = get_clock(); \
  319. \
  320. e = qsp_entry_get(obj, file, line, qsp_t_); \
  321. qsp_entry_record(e, t1 - t0); \
  322. }
  323. #define QSP_GEN_RET1(type_, qsp_t_, func_, impl_) \
  324. static int func_(type_ *obj, const char *file, int line) \
  325. { \
  326. QSPEntry *e; \
  327. int64_t t0, t1; \
  328. int err; \
  329. \
  330. t0 = get_clock(); \
  331. err = impl_(obj, file, line); \
  332. t1 = get_clock(); \
  333. \
  334. e = qsp_entry_get(obj, file, line, qsp_t_); \
  335. do_qsp_entry_record(e, t1 - t0, !err); \
  336. return err; \
  337. }
  338. QSP_GEN_VOID(QemuMutex, QSP_BQL_MUTEX, qsp_bql_mutex_lock, qemu_mutex_lock_impl)
  339. QSP_GEN_VOID(QemuMutex, QSP_MUTEX, qsp_mutex_lock, qemu_mutex_lock_impl)
  340. QSP_GEN_RET1(QemuMutex, QSP_MUTEX, qsp_mutex_trylock, qemu_mutex_trylock_impl)
  341. QSP_GEN_VOID(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_lock,
  342. qemu_rec_mutex_lock_impl)
  343. QSP_GEN_RET1(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_trylock,
  344. qemu_rec_mutex_trylock_impl)
  345. #undef QSP_GEN_RET1
  346. #undef QSP_GEN_VOID
  347. static void
  348. qsp_cond_wait(QemuCond *cond, QemuMutex *mutex, const char *file, int line)
  349. {
  350. QSPEntry *e;
  351. int64_t t0, t1;
  352. t0 = get_clock();
  353. qemu_cond_wait_impl(cond, mutex, file, line);
  354. t1 = get_clock();
  355. e = qsp_entry_get(cond, file, line, QSP_CONDVAR);
  356. qsp_entry_record(e, t1 - t0);
  357. }
  358. static bool
  359. qsp_cond_timedwait(QemuCond *cond, QemuMutex *mutex, int ms,
  360. const char *file, int line)
  361. {
  362. QSPEntry *e;
  363. int64_t t0, t1;
  364. bool ret;
  365. t0 = get_clock();
  366. ret = qemu_cond_timedwait_impl(cond, mutex, ms, file, line);
  367. t1 = get_clock();
  368. e = qsp_entry_get(cond, file, line, QSP_CONDVAR);
  369. qsp_entry_record(e, t1 - t0);
  370. return ret;
  371. }
  372. bool qsp_is_enabled(void)
  373. {
  374. return atomic_read(&qemu_mutex_lock_func) == qsp_mutex_lock;
  375. }
  376. void qsp_enable(void)
  377. {
  378. atomic_set(&qemu_mutex_lock_func, qsp_mutex_lock);
  379. atomic_set(&qemu_mutex_trylock_func, qsp_mutex_trylock);
  380. atomic_set(&qemu_bql_mutex_lock_func, qsp_bql_mutex_lock);
  381. atomic_set(&qemu_rec_mutex_lock_func, qsp_rec_mutex_lock);
  382. atomic_set(&qemu_rec_mutex_trylock_func, qsp_rec_mutex_trylock);
  383. atomic_set(&qemu_cond_wait_func, qsp_cond_wait);
  384. atomic_set(&qemu_cond_timedwait_func, qsp_cond_timedwait);
  385. }
  386. void qsp_disable(void)
  387. {
  388. atomic_set(&qemu_mutex_lock_func, qemu_mutex_lock_impl);
  389. atomic_set(&qemu_mutex_trylock_func, qemu_mutex_trylock_impl);
  390. atomic_set(&qemu_bql_mutex_lock_func, qemu_mutex_lock_impl);
  391. atomic_set(&qemu_rec_mutex_lock_func, qemu_rec_mutex_lock_impl);
  392. atomic_set(&qemu_rec_mutex_trylock_func, qemu_rec_mutex_trylock_impl);
  393. atomic_set(&qemu_cond_wait_func, qemu_cond_wait_impl);
  394. atomic_set(&qemu_cond_timedwait_func, qemu_cond_timedwait_impl);
  395. }
  396. static gint qsp_tree_cmp(gconstpointer ap, gconstpointer bp, gpointer up)
  397. {
  398. const QSPEntry *a = ap;
  399. const QSPEntry *b = bp;
  400. enum QSPSortBy sort_by = *(enum QSPSortBy *)up;
  401. const QSPCallSite *ca;
  402. const QSPCallSite *cb;
  403. switch (sort_by) {
  404. case QSP_SORT_BY_TOTAL_WAIT_TIME:
  405. if (a->ns > b->ns) {
  406. return -1;
  407. } else if (a->ns < b->ns) {
  408. return 1;
  409. }
  410. break;
  411. case QSP_SORT_BY_AVG_WAIT_TIME:
  412. {
  413. double avg_a = a->n_acqs ? a->ns / a->n_acqs : 0;
  414. double avg_b = b->n_acqs ? b->ns / b->n_acqs : 0;
  415. if (avg_a > avg_b) {
  416. return -1;
  417. } else if (avg_a < avg_b) {
  418. return 1;
  419. }
  420. break;
  421. }
  422. default:
  423. g_assert_not_reached();
  424. }
  425. ca = a->callsite;
  426. cb = b->callsite;
  427. /* Break the tie with the object's address */
  428. if (ca->obj < cb->obj) {
  429. return -1;
  430. } else if (ca->obj > cb->obj) {
  431. return 1;
  432. } else {
  433. int cmp;
  434. /* same obj. Break the tie with the callsite's file */
  435. cmp = strcmp(ca->file, cb->file);
  436. if (cmp) {
  437. return cmp;
  438. }
  439. /* same callsite file. Break the tie with the callsite's line */
  440. g_assert(ca->line != cb->line);
  441. if (ca->line < cb->line) {
  442. return -1;
  443. } else if (ca->line > cb->line) {
  444. return 1;
  445. } else {
  446. /* break the tie with the callsite's type */
  447. return cb->type - ca->type;
  448. }
  449. }
  450. }
  451. static void qsp_sort(void *p, uint32_t h, void *userp)
  452. {
  453. QSPEntry *e = p;
  454. GTree *tree = userp;
  455. g_tree_insert(tree, e, NULL);
  456. }
  457. static void qsp_aggregate(void *p, uint32_t h, void *up)
  458. {
  459. struct qht *ht = up;
  460. const QSPEntry *e = p;
  461. QSPEntry *agg;
  462. uint32_t hash;
  463. hash = qsp_entry_no_thread_hash(e);
  464. agg = qsp_entry_find(ht, e, hash);
  465. /*
  466. * The entry is in the global hash table; read from it atomically (as in
  467. * "read once").
  468. */
  469. agg->ns += atomic_read_u64(&e->ns);
  470. agg->n_acqs += atomic_read_u64(&e->n_acqs);
  471. }
  472. static void qsp_iter_diff(void *p, uint32_t hash, void *htp)
  473. {
  474. struct qht *ht = htp;
  475. QSPEntry *old = p;
  476. QSPEntry *new;
  477. new = qht_lookup(ht, old, hash);
  478. /* entries are never deleted, so we must have this one */
  479. g_assert(new != NULL);
  480. /* our reading of the stats happened after the snapshot was taken */
  481. g_assert(new->n_acqs >= old->n_acqs);
  482. g_assert(new->ns >= old->ns);
  483. new->n_acqs -= old->n_acqs;
  484. new->ns -= old->ns;
  485. /* No point in reporting an empty entry */
  486. if (new->n_acqs == 0 && new->ns == 0) {
  487. bool removed = qht_remove(ht, new, hash);
  488. g_assert(removed);
  489. g_free(new);
  490. }
  491. }
  492. static void qsp_diff(struct qht *orig, struct qht *new)
  493. {
  494. qht_iter(orig, qsp_iter_diff, new);
  495. }
  496. static void qsp_iter_callsite_coalesce(void *p, uint32_t h, void *htp)
  497. {
  498. struct qht *ht = htp;
  499. QSPEntry *old = p;
  500. QSPEntry *e;
  501. uint32_t hash;
  502. hash = qsp_entry_no_thread_obj_hash(old);
  503. e = qht_lookup(ht, old, hash);
  504. if (e == NULL) {
  505. e = qsp_entry_create(ht, old, hash);
  506. e->n_objs = 1;
  507. } else if (e->callsite->obj != old->callsite->obj) {
  508. e->n_objs++;
  509. }
  510. e->ns += old->ns;
  511. e->n_acqs += old->n_acqs;
  512. }
  513. static void qsp_ht_delete(void *p, uint32_t h, void *htp)
  514. {
  515. g_free(p);
  516. }
  517. static void qsp_mktree(GTree *tree, bool callsite_coalesce)
  518. {
  519. QSPSnapshot *snap;
  520. struct qht ht, coalesce_ht;
  521. struct qht *htp;
  522. /*
  523. * First, see if there's a prior snapshot, so that we read the global hash
  524. * table _after_ the snapshot has been created, which guarantees that
  525. * the entries we'll read will be a superset of the snapshot's entries.
  526. *
  527. * We must remain in an RCU read-side critical section until we're done
  528. * with the snapshot.
  529. */
  530. rcu_read_lock();
  531. snap = atomic_rcu_read(&qsp_snapshot);
  532. /* Aggregate all results from the global hash table into a local one */
  533. qht_init(&ht, qsp_entry_no_thread_cmp, QSP_INITIAL_SIZE,
  534. QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
  535. qht_iter(&qsp_ht, qsp_aggregate, &ht);
  536. /* compute the difference wrt the snapshot, if any */
  537. if (snap) {
  538. qsp_diff(&snap->ht, &ht);
  539. }
  540. /* done with the snapshot; RCU can reclaim it */
  541. rcu_read_unlock();
  542. htp = &ht;
  543. if (callsite_coalesce) {
  544. qht_init(&coalesce_ht, qsp_entry_no_thread_obj_cmp, QSP_INITIAL_SIZE,
  545. QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
  546. qht_iter(&ht, qsp_iter_callsite_coalesce, &coalesce_ht);
  547. /* free the previous hash table, and point htp to coalesce_ht */
  548. qht_iter(&ht, qsp_ht_delete, NULL);
  549. qht_destroy(&ht);
  550. htp = &coalesce_ht;
  551. }
  552. /* sort the hash table elements by using a tree */
  553. qht_iter(htp, qsp_sort, tree);
  554. /* free the hash table, but keep the elements (those are in the tree now) */
  555. qht_destroy(htp);
  556. }
  557. /* free string with g_free */
  558. static char *qsp_at(const QSPCallSite *callsite)
  559. {
  560. GString *s = g_string_new(NULL);
  561. const char *shortened;
  562. /* remove the absolute path to qemu */
  563. if (unlikely(strlen(callsite->file) < qsp_qemu_path_len)) {
  564. shortened = callsite->file;
  565. } else {
  566. shortened = callsite->file + qsp_qemu_path_len;
  567. }
  568. g_string_append_printf(s, "%s:%u", shortened, callsite->line);
  569. return g_string_free(s, FALSE);
  570. }
  571. struct QSPReportEntry {
  572. const void *obj;
  573. char *callsite_at;
  574. const char *typename;
  575. double time_s;
  576. double ns_avg;
  577. uint64_t n_acqs;
  578. unsigned int n_objs;
  579. };
  580. typedef struct QSPReportEntry QSPReportEntry;
  581. struct QSPReport {
  582. QSPReportEntry *entries;
  583. size_t n_entries;
  584. size_t max_n_entries;
  585. };
  586. typedef struct QSPReport QSPReport;
  587. static gboolean qsp_tree_report(gpointer key, gpointer value, gpointer udata)
  588. {
  589. const QSPEntry *e = key;
  590. QSPReport *report = udata;
  591. QSPReportEntry *entry;
  592. if (report->n_entries == report->max_n_entries) {
  593. return TRUE;
  594. }
  595. entry = &report->entries[report->n_entries];
  596. report->n_entries++;
  597. entry->obj = e->callsite->obj;
  598. entry->n_objs = e->n_objs;
  599. entry->callsite_at = qsp_at(e->callsite);
  600. entry->typename = qsp_typenames[e->callsite->type];
  601. entry->time_s = e->ns * 1e-9;
  602. entry->n_acqs = e->n_acqs;
  603. entry->ns_avg = e->n_acqs ? e->ns / e->n_acqs : 0;
  604. return FALSE;
  605. }
  606. static void pr_report(const QSPReport *rep)
  607. {
  608. char *dashes;
  609. size_t max_len = 0;
  610. int callsite_len = 0;
  611. int callsite_rspace;
  612. int n_dashes;
  613. size_t i;
  614. /* find out the maximum length of all 'callsite' fields */
  615. for (i = 0; i < rep->n_entries; i++) {
  616. const QSPReportEntry *e = &rep->entries[i];
  617. size_t len = strlen(e->callsite_at);
  618. if (len > max_len) {
  619. max_len = len;
  620. }
  621. }
  622. callsite_len = MAX(max_len, strlen("Call site"));
  623. /* white space to leave to the right of "Call site" */
  624. callsite_rspace = callsite_len - strlen("Call site");
  625. qemu_printf("Type Object Call site%*s Wait Time (s) "
  626. " Count Average (us)\n", callsite_rspace, "");
  627. /* build a horizontal rule with dashes */
  628. n_dashes = 79 + callsite_rspace;
  629. dashes = g_malloc(n_dashes + 1);
  630. memset(dashes, '-', n_dashes);
  631. dashes[n_dashes] = '\0';
  632. qemu_printf("%s\n", dashes);
  633. for (i = 0; i < rep->n_entries; i++) {
  634. const QSPReportEntry *e = &rep->entries[i];
  635. GString *s = g_string_new(NULL);
  636. g_string_append_printf(s, "%-9s ", e->typename);
  637. if (e->n_objs > 1) {
  638. g_string_append_printf(s, "[%12u]", e->n_objs);
  639. } else {
  640. g_string_append_printf(s, "%14p", e->obj);
  641. }
  642. g_string_append_printf(s, " %s%*s %13.5f %12" PRIu64 " %12.2f\n",
  643. e->callsite_at,
  644. callsite_len - (int)strlen(e->callsite_at), "",
  645. e->time_s, e->n_acqs, e->ns_avg * 1e-3);
  646. qemu_printf("%s", s->str);
  647. g_string_free(s, TRUE);
  648. }
  649. qemu_printf("%s\n", dashes);
  650. g_free(dashes);
  651. }
  652. static void report_destroy(QSPReport *rep)
  653. {
  654. size_t i;
  655. for (i = 0; i < rep->n_entries; i++) {
  656. QSPReportEntry *e = &rep->entries[i];
  657. g_free(e->callsite_at);
  658. }
  659. g_free(rep->entries);
  660. }
  661. void qsp_report(size_t max, enum QSPSortBy sort_by,
  662. bool callsite_coalesce)
  663. {
  664. GTree *tree = g_tree_new_full(qsp_tree_cmp, &sort_by, g_free, NULL);
  665. QSPReport rep;
  666. qsp_init();
  667. rep.entries = g_new0(QSPReportEntry, max);
  668. rep.n_entries = 0;
  669. rep.max_n_entries = max;
  670. qsp_mktree(tree, callsite_coalesce);
  671. g_tree_foreach(tree, qsp_tree_report, &rep);
  672. g_tree_destroy(tree);
  673. pr_report(&rep);
  674. report_destroy(&rep);
  675. }
  676. static void qsp_snapshot_destroy(QSPSnapshot *snap)
  677. {
  678. qht_iter(&snap->ht, qsp_ht_delete, NULL);
  679. qht_destroy(&snap->ht);
  680. g_free(snap);
  681. }
  682. void qsp_reset(void)
  683. {
  684. QSPSnapshot *new = g_new(QSPSnapshot, 1);
  685. QSPSnapshot *old;
  686. qsp_init();
  687. qht_init(&new->ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
  688. QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
  689. /* take a snapshot of the current state */
  690. qht_iter(&qsp_ht, qsp_aggregate, &new->ht);
  691. /* replace the previous snapshot, if any */
  692. old = atomic_xchg(&qsp_snapshot, new);
  693. if (old) {
  694. call_rcu(old, qsp_snapshot_destroy, rcu);
  695. }
  696. }