qdist.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. /*
  2. * qdist.c - QEMU helpers for handling frequency distributions of data.
  3. *
  4. * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
  5. *
  6. * License: GNU GPL, version 2 or later.
  7. * See the COPYING file in the top-level directory.
  8. */
  9. #include "qemu/osdep.h"
  10. #include "qemu/qdist.h"
  11. #include <math.h>
  12. #ifndef NAN
  13. #define NAN (0.0 / 0.0)
  14. #endif
  15. #define QDIST_EMPTY_STR "(empty)"
  16. void qdist_init(struct qdist *dist)
  17. {
  18. dist->entries = g_new(struct qdist_entry, 1);
  19. dist->size = 1;
  20. dist->n = 0;
  21. }
  22. void qdist_destroy(struct qdist *dist)
  23. {
  24. g_free(dist->entries);
  25. }
  26. static inline int qdist_cmp_double(double a, double b)
  27. {
  28. if (a > b) {
  29. return 1;
  30. } else if (a < b) {
  31. return -1;
  32. }
  33. return 0;
  34. }
  35. static int qdist_cmp(const void *ap, const void *bp)
  36. {
  37. const struct qdist_entry *a = ap;
  38. const struct qdist_entry *b = bp;
  39. return qdist_cmp_double(a->x, b->x);
  40. }
  41. void qdist_add(struct qdist *dist, double x, long count)
  42. {
  43. struct qdist_entry *entry = NULL;
  44. if (dist->n) {
  45. struct qdist_entry e;
  46. e.x = x;
  47. entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
  48. }
  49. if (entry) {
  50. entry->count += count;
  51. return;
  52. }
  53. if (unlikely(dist->n == dist->size)) {
  54. dist->size *= 2;
  55. dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
  56. }
  57. dist->n++;
  58. entry = &dist->entries[dist->n - 1];
  59. entry->x = x;
  60. entry->count = count;
  61. qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
  62. }
  63. void qdist_inc(struct qdist *dist, double x)
  64. {
  65. qdist_add(dist, x, 1);
  66. }
  67. /*
  68. * Unicode for block elements. See:
  69. * https://en.wikipedia.org/wiki/Block_Elements
  70. */
  71. static const gunichar qdist_blocks[] = {
  72. 0x2581,
  73. 0x2582,
  74. 0x2583,
  75. 0x2584,
  76. 0x2585,
  77. 0x2586,
  78. 0x2587,
  79. 0x2588
  80. };
  81. #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
  82. /*
  83. * Print a distribution into a string.
  84. *
  85. * This function assumes that appropriate binning has been done on the input;
  86. * see qdist_bin__internal() and qdist_pr_plain().
  87. *
  88. * Callers must free the returned string with g_free().
  89. */
  90. static char *qdist_pr_internal(const struct qdist *dist)
  91. {
  92. double min, max;
  93. GString *s = g_string_new("");
  94. size_t i;
  95. /* if only one entry, its printout will be either full or empty */
  96. if (dist->n == 1) {
  97. if (dist->entries[0].count) {
  98. g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
  99. } else {
  100. g_string_append_c(s, ' ');
  101. }
  102. goto out;
  103. }
  104. /* get min and max counts */
  105. min = dist->entries[0].count;
  106. max = min;
  107. for (i = 0; i < dist->n; i++) {
  108. struct qdist_entry *e = &dist->entries[i];
  109. if (e->count < min) {
  110. min = e->count;
  111. }
  112. if (e->count > max) {
  113. max = e->count;
  114. }
  115. }
  116. for (i = 0; i < dist->n; i++) {
  117. struct qdist_entry *e = &dist->entries[i];
  118. int index;
  119. /* make an exception with 0; instead of using block[0], print a space */
  120. if (e->count) {
  121. /* divide first to avoid loss of precision when e->count == max */
  122. index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
  123. g_string_append_unichar(s, qdist_blocks[index]);
  124. } else {
  125. g_string_append_c(s, ' ');
  126. }
  127. }
  128. out:
  129. return g_string_free(s, FALSE);
  130. }
  131. /*
  132. * Bin the distribution in @from into @n bins of consecutive, non-overlapping
  133. * intervals, copying the result to @to.
  134. *
  135. * This function is internal to qdist: only this file and test code should
  136. * ever call it.
  137. *
  138. * Note: calling this function on an already-binned qdist is a bug.
  139. *
  140. * If @n == 0 or @from->n == 1, use @from->n.
  141. */
  142. void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
  143. {
  144. double xmin, xmax;
  145. double step;
  146. size_t i, j;
  147. qdist_init(to);
  148. if (from->n == 0) {
  149. return;
  150. }
  151. if (n == 0 || from->n == 1) {
  152. n = from->n;
  153. }
  154. /* set equally-sized bins between @from's left and right */
  155. xmin = qdist_xmin(from);
  156. xmax = qdist_xmax(from);
  157. step = (xmax - xmin) / n;
  158. if (n == from->n) {
  159. /* if @from's entries are equally spaced, no need to re-bin */
  160. for (i = 0; i < from->n; i++) {
  161. if (from->entries[i].x != xmin + i * step) {
  162. goto rebin;
  163. }
  164. }
  165. /* they're equally spaced, so copy the dist and bail out */
  166. to->entries = g_renew(struct qdist_entry, to->entries, n);
  167. to->n = from->n;
  168. memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
  169. return;
  170. }
  171. rebin:
  172. j = 0;
  173. for (i = 0; i < n; i++) {
  174. double x;
  175. double left, right;
  176. left = xmin + i * step;
  177. right = xmin + (i + 1) * step;
  178. /* Add x, even if it might not get any counts later */
  179. x = left;
  180. qdist_add(to, x, 0);
  181. /*
  182. * To avoid double-counting we capture [left, right) ranges, except for
  183. * the righmost bin, which captures a [left, right] range.
  184. */
  185. while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
  186. struct qdist_entry *o = &from->entries[j];
  187. qdist_add(to, x, o->count);
  188. j++;
  189. }
  190. }
  191. }
  192. /*
  193. * Print @dist into a string, after re-binning it into @n bins of consecutive,
  194. * non-overlapping intervals.
  195. *
  196. * If @n == 0, use @orig->n.
  197. *
  198. * Callers must free the returned string with g_free().
  199. */
  200. char *qdist_pr_plain(const struct qdist *dist, size_t n)
  201. {
  202. struct qdist binned;
  203. char *ret;
  204. if (dist->n == 0) {
  205. return g_strdup(QDIST_EMPTY_STR);
  206. }
  207. qdist_bin__internal(&binned, dist, n);
  208. ret = qdist_pr_internal(&binned);
  209. qdist_destroy(&binned);
  210. return ret;
  211. }
  212. static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
  213. uint32_t opt, bool is_left)
  214. {
  215. const char *percent;
  216. const char *lparen;
  217. const char *rparen;
  218. GString *s;
  219. double x1, x2, step;
  220. double x;
  221. double n;
  222. int dec;
  223. s = g_string_new("");
  224. if (!(opt & QDIST_PR_LABELS)) {
  225. goto out;
  226. }
  227. dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
  228. percent = opt & QDIST_PR_PERCENT ? "%" : "";
  229. n = n_bins ? n_bins : dist->n;
  230. x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
  231. step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
  232. if (opt & QDIST_PR_100X) {
  233. x *= 100.0;
  234. step *= 100.0;
  235. }
  236. if (opt & QDIST_PR_NOBINRANGE) {
  237. lparen = rparen = "";
  238. x1 = x;
  239. x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
  240. } else {
  241. lparen = "[";
  242. rparen = is_left ? ")" : "]";
  243. if (is_left) {
  244. x1 = x;
  245. x2 = x + step;
  246. } else {
  247. x1 = x - step;
  248. x2 = x;
  249. }
  250. }
  251. g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
  252. if (!(opt & QDIST_PR_NOBINRANGE)) {
  253. g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
  254. }
  255. g_string_append(s, percent);
  256. out:
  257. return g_string_free(s, FALSE);
  258. }
  259. /*
  260. * Print the distribution's histogram into a string.
  261. *
  262. * See also: qdist_pr_plain().
  263. *
  264. * Callers must free the returned string with g_free().
  265. */
  266. char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
  267. {
  268. const char *border = opt & QDIST_PR_BORDER ? "|" : "";
  269. char *llabel, *rlabel;
  270. char *hgram;
  271. GString *s;
  272. if (dist->n == 0) {
  273. return g_strdup(QDIST_EMPTY_STR);
  274. }
  275. s = g_string_new("");
  276. llabel = qdist_pr_label(dist, n_bins, opt, true);
  277. rlabel = qdist_pr_label(dist, n_bins, opt, false);
  278. hgram = qdist_pr_plain(dist, n_bins);
  279. g_string_append_printf(s, "%s%s%s%s%s",
  280. llabel, border, hgram, border, rlabel);
  281. g_free(llabel);
  282. g_free(rlabel);
  283. g_free(hgram);
  284. return g_string_free(s, FALSE);
  285. }
  286. static inline double qdist_x(const struct qdist *dist, int index)
  287. {
  288. if (dist->n == 0) {
  289. return NAN;
  290. }
  291. return dist->entries[index].x;
  292. }
  293. double qdist_xmin(const struct qdist *dist)
  294. {
  295. return qdist_x(dist, 0);
  296. }
  297. double qdist_xmax(const struct qdist *dist)
  298. {
  299. return qdist_x(dist, dist->n - 1);
  300. }
  301. size_t qdist_unique_entries(const struct qdist *dist)
  302. {
  303. return dist->n;
  304. }
  305. unsigned long qdist_sample_count(const struct qdist *dist)
  306. {
  307. unsigned long count = 0;
  308. size_t i;
  309. for (i = 0; i < dist->n; i++) {
  310. struct qdist_entry *e = &dist->entries[i];
  311. count += e->count;
  312. }
  313. return count;
  314. }
  315. static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
  316. size_t n, unsigned long count)
  317. {
  318. /* amortize the recursion by using a base case > 2 */
  319. if (n <= 8) {
  320. size_t i;
  321. double ret = 0;
  322. for (i = 0; i < n; i++) {
  323. struct qdist_entry *e = &dist->entries[index + i];
  324. ret += e->x * e->count / count;
  325. }
  326. return ret;
  327. } else {
  328. size_t n2 = n / 2;
  329. return qdist_pairwise_avg(dist, index, n2, count) +
  330. qdist_pairwise_avg(dist, index + n2, n - n2, count);
  331. }
  332. }
  333. double qdist_avg(const struct qdist *dist)
  334. {
  335. unsigned long count;
  336. count = qdist_sample_count(dist);
  337. if (!count) {
  338. return NAN;
  339. }
  340. return qdist_pairwise_avg(dist, 0, dist->n, count);
  341. }