123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397 |
- /*
- * qdist.c - QEMU helpers for handling frequency distributions of data.
- *
- * Copyright (C) 2016, Emilio G. Cota <cota@braap.org>
- *
- * License: GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
- #include "qemu/osdep.h"
- #include "qemu/qdist.h"
- #include <math.h>
- #ifndef NAN
- #define NAN (0.0 / 0.0)
- #endif
- #define QDIST_EMPTY_STR "(empty)"
- void qdist_init(struct qdist *dist)
- {
- dist->entries = g_new(struct qdist_entry, 1);
- dist->size = 1;
- dist->n = 0;
- }
- void qdist_destroy(struct qdist *dist)
- {
- g_free(dist->entries);
- }
- static inline int qdist_cmp_double(double a, double b)
- {
- if (a > b) {
- return 1;
- } else if (a < b) {
- return -1;
- }
- return 0;
- }
- static int qdist_cmp(const void *ap, const void *bp)
- {
- const struct qdist_entry *a = ap;
- const struct qdist_entry *b = bp;
- return qdist_cmp_double(a->x, b->x);
- }
- void qdist_add(struct qdist *dist, double x, long count)
- {
- struct qdist_entry *entry = NULL;
- if (dist->n) {
- struct qdist_entry e;
- e.x = x;
- entry = bsearch(&e, dist->entries, dist->n, sizeof(e), qdist_cmp);
- }
- if (entry) {
- entry->count += count;
- return;
- }
- if (unlikely(dist->n == dist->size)) {
- dist->size *= 2;
- dist->entries = g_renew(struct qdist_entry, dist->entries, dist->size);
- }
- dist->n++;
- entry = &dist->entries[dist->n - 1];
- entry->x = x;
- entry->count = count;
- qsort(dist->entries, dist->n, sizeof(*entry), qdist_cmp);
- }
- void qdist_inc(struct qdist *dist, double x)
- {
- qdist_add(dist, x, 1);
- }
- /*
- * Unicode for block elements. See:
- * https://en.wikipedia.org/wiki/Block_Elements
- */
- static const gunichar qdist_blocks[] = {
- 0x2581,
- 0x2582,
- 0x2583,
- 0x2584,
- 0x2585,
- 0x2586,
- 0x2587,
- 0x2588
- };
- #define QDIST_NR_BLOCK_CODES ARRAY_SIZE(qdist_blocks)
- /*
- * Print a distribution into a string.
- *
- * This function assumes that appropriate binning has been done on the input;
- * see qdist_bin__internal() and qdist_pr_plain().
- *
- * Callers must free the returned string with g_free().
- */
- static char *qdist_pr_internal(const struct qdist *dist)
- {
- double min, max;
- GString *s = g_string_new("");
- size_t i;
- /* if only one entry, its printout will be either full or empty */
- if (dist->n == 1) {
- if (dist->entries[0].count) {
- g_string_append_unichar(s, qdist_blocks[QDIST_NR_BLOCK_CODES - 1]);
- } else {
- g_string_append_c(s, ' ');
- }
- goto out;
- }
- /* get min and max counts */
- min = dist->entries[0].count;
- max = min;
- for (i = 0; i < dist->n; i++) {
- struct qdist_entry *e = &dist->entries[i];
- if (e->count < min) {
- min = e->count;
- }
- if (e->count > max) {
- max = e->count;
- }
- }
- for (i = 0; i < dist->n; i++) {
- struct qdist_entry *e = &dist->entries[i];
- int index;
- /* make an exception with 0; instead of using block[0], print a space */
- if (e->count) {
- /* divide first to avoid loss of precision when e->count == max */
- index = (e->count - min) / (max - min) * (QDIST_NR_BLOCK_CODES - 1);
- g_string_append_unichar(s, qdist_blocks[index]);
- } else {
- g_string_append_c(s, ' ');
- }
- }
- out:
- return g_string_free(s, FALSE);
- }
- /*
- * Bin the distribution in @from into @n bins of consecutive, non-overlapping
- * intervals, copying the result to @to.
- *
- * This function is internal to qdist: only this file and test code should
- * ever call it.
- *
- * Note: calling this function on an already-binned qdist is a bug.
- *
- * If @n == 0 or @from->n == 1, use @from->n.
- */
- void qdist_bin__internal(struct qdist *to, const struct qdist *from, size_t n)
- {
- double xmin, xmax;
- double step;
- size_t i, j;
- qdist_init(to);
- if (from->n == 0) {
- return;
- }
- if (n == 0 || from->n == 1) {
- n = from->n;
- }
- /* set equally-sized bins between @from's left and right */
- xmin = qdist_xmin(from);
- xmax = qdist_xmax(from);
- step = (xmax - xmin) / n;
- if (n == from->n) {
- /* if @from's entries are equally spaced, no need to re-bin */
- for (i = 0; i < from->n; i++) {
- if (from->entries[i].x != xmin + i * step) {
- goto rebin;
- }
- }
- /* they're equally spaced, so copy the dist and bail out */
- to->entries = g_renew(struct qdist_entry, to->entries, n);
- to->n = from->n;
- memcpy(to->entries, from->entries, sizeof(*to->entries) * to->n);
- return;
- }
- rebin:
- j = 0;
- for (i = 0; i < n; i++) {
- double x;
- double left, right;
- left = xmin + i * step;
- right = xmin + (i + 1) * step;
- /* Add x, even if it might not get any counts later */
- x = left;
- qdist_add(to, x, 0);
- /*
- * To avoid double-counting we capture [left, right) ranges, except for
- * the righmost bin, which captures a [left, right] range.
- */
- while (j < from->n && (from->entries[j].x < right || i == n - 1)) {
- struct qdist_entry *o = &from->entries[j];
- qdist_add(to, x, o->count);
- j++;
- }
- }
- }
- /*
- * Print @dist into a string, after re-binning it into @n bins of consecutive,
- * non-overlapping intervals.
- *
- * If @n == 0, use @orig->n.
- *
- * Callers must free the returned string with g_free().
- */
- char *qdist_pr_plain(const struct qdist *dist, size_t n)
- {
- struct qdist binned;
- char *ret;
- if (dist->n == 0) {
- return g_strdup(QDIST_EMPTY_STR);
- }
- qdist_bin__internal(&binned, dist, n);
- ret = qdist_pr_internal(&binned);
- qdist_destroy(&binned);
- return ret;
- }
- static char *qdist_pr_label(const struct qdist *dist, size_t n_bins,
- uint32_t opt, bool is_left)
- {
- const char *percent;
- const char *lparen;
- const char *rparen;
- GString *s;
- double x1, x2, step;
- double x;
- double n;
- int dec;
- s = g_string_new("");
- if (!(opt & QDIST_PR_LABELS)) {
- goto out;
- }
- dec = opt & QDIST_PR_NODECIMAL ? 0 : 1;
- percent = opt & QDIST_PR_PERCENT ? "%" : "";
- n = n_bins ? n_bins : dist->n;
- x = is_left ? qdist_xmin(dist) : qdist_xmax(dist);
- step = (qdist_xmax(dist) - qdist_xmin(dist)) / n;
- if (opt & QDIST_PR_100X) {
- x *= 100.0;
- step *= 100.0;
- }
- if (opt & QDIST_PR_NOBINRANGE) {
- lparen = rparen = "";
- x1 = x;
- x2 = x; /* unnecessary, but a dumb compiler might not figure it out */
- } else {
- lparen = "[";
- rparen = is_left ? ")" : "]";
- if (is_left) {
- x1 = x;
- x2 = x + step;
- } else {
- x1 = x - step;
- x2 = x;
- }
- }
- g_string_append_printf(s, "%s%.*f", lparen, dec, x1);
- if (!(opt & QDIST_PR_NOBINRANGE)) {
- g_string_append_printf(s, ",%.*f%s", dec, x2, rparen);
- }
- g_string_append(s, percent);
- out:
- return g_string_free(s, FALSE);
- }
- /*
- * Print the distribution's histogram into a string.
- *
- * See also: qdist_pr_plain().
- *
- * Callers must free the returned string with g_free().
- */
- char *qdist_pr(const struct qdist *dist, size_t n_bins, uint32_t opt)
- {
- const char *border = opt & QDIST_PR_BORDER ? "|" : "";
- char *llabel, *rlabel;
- char *hgram;
- GString *s;
- if (dist->n == 0) {
- return g_strdup(QDIST_EMPTY_STR);
- }
- s = g_string_new("");
- llabel = qdist_pr_label(dist, n_bins, opt, true);
- rlabel = qdist_pr_label(dist, n_bins, opt, false);
- hgram = qdist_pr_plain(dist, n_bins);
- g_string_append_printf(s, "%s%s%s%s%s",
- llabel, border, hgram, border, rlabel);
- g_free(llabel);
- g_free(rlabel);
- g_free(hgram);
- return g_string_free(s, FALSE);
- }
- static inline double qdist_x(const struct qdist *dist, int index)
- {
- if (dist->n == 0) {
- return NAN;
- }
- return dist->entries[index].x;
- }
- double qdist_xmin(const struct qdist *dist)
- {
- return qdist_x(dist, 0);
- }
- double qdist_xmax(const struct qdist *dist)
- {
- return qdist_x(dist, dist->n - 1);
- }
- size_t qdist_unique_entries(const struct qdist *dist)
- {
- return dist->n;
- }
- unsigned long qdist_sample_count(const struct qdist *dist)
- {
- unsigned long count = 0;
- size_t i;
- for (i = 0; i < dist->n; i++) {
- struct qdist_entry *e = &dist->entries[i];
- count += e->count;
- }
- return count;
- }
- static double qdist_pairwise_avg(const struct qdist *dist, size_t index,
- size_t n, unsigned long count)
- {
- /* amortize the recursion by using a base case > 2 */
- if (n <= 8) {
- size_t i;
- double ret = 0;
- for (i = 0; i < n; i++) {
- struct qdist_entry *e = &dist->entries[index + i];
- ret += e->x * e->count / count;
- }
- return ret;
- } else {
- size_t n2 = n / 2;
- return qdist_pairwise_avg(dist, index, n2, count) +
- qdist_pairwise_avg(dist, index + n2, n - n2, count);
- }
- }
- double qdist_avg(const struct qdist *dist)
- {
- unsigned long count;
- count = qdist_sample_count(dist);
- if (!count) {
- return NAN;
- }
- return qdist_pairwise_avg(dist, 0, dist->n, count);
- }
|