2
0

howvec.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /*
  2. * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
  3. *
  4. * How vectorised is this code?
  5. *
  6. * Attempt to measure the amount of vectorisation that has been done
  7. * on some code by counting classes of instruction.
  8. *
  9. * License: GNU GPL, version 2 or later.
  10. * See the COPYING file in the top-level directory.
  11. */
  12. #include <inttypes.h>
  13. #include <assert.h>
  14. #include <stdlib.h>
  15. #include <inttypes.h>
  16. #include <string.h>
  17. #include <unistd.h>
  18. #include <stdio.h>
  19. #include <glib.h>
  20. #include <qemu-plugin.h>
  21. QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
  22. #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  23. typedef enum {
  24. COUNT_CLASS,
  25. COUNT_INDIVIDUAL,
  26. COUNT_NONE
  27. } CountType;
  28. static int limit = 50;
  29. static bool do_inline;
  30. static bool verbose;
  31. static GMutex lock;
  32. static GHashTable *insns;
  33. typedef struct {
  34. const char *class;
  35. const char *opt;
  36. uint32_t mask;
  37. uint32_t pattern;
  38. CountType what;
  39. qemu_plugin_u64 count;
  40. } InsnClassExecCount;
  41. typedef struct {
  42. char *insn;
  43. uint32_t opcode;
  44. qemu_plugin_u64 count;
  45. InsnClassExecCount *class;
  46. } InsnExecCount;
  47. /*
  48. * Matchers for classes of instructions, order is important.
  49. *
  50. * Your most precise match must be before looser matches. If no match
  51. * is found in the table we can create an individual entry.
  52. *
  53. * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
  54. */
  55. static InsnClassExecCount aarch64_insn_classes[] = {
  56. /* "Reserved"" */
  57. { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE},
  58. { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS},
  59. { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS},
  60. /* Data Processing Immediate */
  61. { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS},
  62. { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS},
  63. { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS},
  64. { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS},
  65. { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS},
  66. { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS},
  67. { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS},
  68. { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS},
  69. /* Branches */
  70. { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS},
  71. { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS},
  72. { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE},
  73. { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS},
  74. { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS},
  75. { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS},
  76. { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS},
  77. { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS},
  78. { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS},
  79. { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS},
  80. { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS},
  81. { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS},
  82. { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
  83. /* Loads and Stores */
  84. { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
  85. { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS},
  86. { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
  87. { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS},
  88. { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS},
  89. { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS},
  90. { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS},
  91. { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS},
  92. { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS},
  93. { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS},
  94. { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
  95. { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
  96. { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
  97. { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS},
  98. { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS},
  99. /* Data Processing Register */
  100. { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS},
  101. /* Scalar FP */
  102. { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
  103. /* Unclassified */
  104. { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
  105. };
  106. static InsnClassExecCount sparc32_insn_classes[] = {
  107. { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS},
  108. { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS},
  109. { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS},
  110. { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS},
  111. { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS},
  112. { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS},
  113. { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS},
  114. /* Unclassified */
  115. { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
  116. };
  117. static InsnClassExecCount sparc64_insn_classes[] = {
  118. { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS},
  119. { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS},
  120. { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS},
  121. { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS},
  122. /* Unclassified */
  123. { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
  124. };
  125. /* Default matcher for currently unclassified architectures */
  126. static InsnClassExecCount default_insn_classes[] = {
  127. { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
  128. };
  129. typedef struct {
  130. const char *qemu_target;
  131. InsnClassExecCount *table;
  132. int table_sz;
  133. } ClassSelector;
  134. static ClassSelector class_tables[] = {
  135. { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
  136. { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
  137. { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
  138. { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
  139. };
  140. static InsnClassExecCount *class_table;
  141. static int class_table_sz;
  142. static gint cmp_exec_count(gconstpointer a, gconstpointer b)
  143. {
  144. InsnExecCount *ea = (InsnExecCount *) a;
  145. InsnExecCount *eb = (InsnExecCount *) b;
  146. uint64_t count_a = qemu_plugin_u64_sum(ea->count);
  147. uint64_t count_b = qemu_plugin_u64_sum(eb->count);
  148. return count_a > count_b ? -1 : 1;
  149. }
  150. static void free_record(gpointer data)
  151. {
  152. InsnExecCount *rec = (InsnExecCount *) data;
  153. qemu_plugin_scoreboard_free(rec->count.score);
  154. g_free(rec->insn);
  155. g_free(rec);
  156. }
  157. static void plugin_exit(qemu_plugin_id_t id, void *p)
  158. {
  159. g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
  160. int i;
  161. uint64_t total_count;
  162. GList *counts;
  163. InsnClassExecCount *class = NULL;
  164. for (i = 0; i < class_table_sz; i++) {
  165. class = &class_table[i];
  166. switch (class->what) {
  167. case COUNT_CLASS:
  168. total_count = qemu_plugin_u64_sum(class->count);
  169. if (total_count || verbose) {
  170. g_string_append_printf(report,
  171. "Class: %-24s\t(%" PRId64 " hits)\n",
  172. class->class,
  173. total_count);
  174. }
  175. break;
  176. case COUNT_INDIVIDUAL:
  177. g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
  178. class->class);
  179. break;
  180. case COUNT_NONE:
  181. g_string_append_printf(report, "Class: %-24s\tnot counted\n",
  182. class->class);
  183. break;
  184. default:
  185. break;
  186. }
  187. }
  188. counts = g_hash_table_get_values(insns);
  189. if (counts && g_list_next(counts)) {
  190. g_string_append_printf(report, "Individual Instructions:\n");
  191. counts = g_list_sort(counts, cmp_exec_count);
  192. for (i = 0; i < limit && g_list_next(counts);
  193. i++, counts = g_list_next(counts)) {
  194. InsnExecCount *rec = (InsnExecCount *) counts->data;
  195. g_string_append_printf(report,
  196. "Instr: %-24s\t(%" PRId64 " hits)"
  197. "\t(op=0x%08x/%s)\n",
  198. rec->insn,
  199. qemu_plugin_u64_sum(rec->count),
  200. rec->opcode,
  201. rec->class ?
  202. rec->class->class : "un-categorised");
  203. }
  204. g_list_free(counts);
  205. }
  206. g_hash_table_destroy(insns);
  207. for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
  208. for (int j = 0; j < class_tables[i].table_sz; ++j) {
  209. qemu_plugin_scoreboard_free(class_tables[i].table[j].count.score);
  210. }
  211. }
  212. qemu_plugin_outs(report->str);
  213. }
  214. static void plugin_init(void)
  215. {
  216. insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record);
  217. }
  218. static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
  219. {
  220. struct qemu_plugin_scoreboard *score = udata;
  221. qemu_plugin_u64_add(qemu_plugin_scoreboard_u64(score), cpu_index, 1);
  222. }
  223. static struct qemu_plugin_scoreboard *find_counter(
  224. struct qemu_plugin_insn *insn)
  225. {
  226. int i;
  227. uint64_t *cnt = NULL;
  228. uint32_t opcode = 0;
  229. /* if opcode is greater than 32 bits, we should refactor insn hash table. */
  230. G_STATIC_ASSERT(sizeof(opcode) == sizeof(uint32_t));
  231. InsnClassExecCount *class = NULL;
  232. /*
  233. * We only match the first 32 bits of the instruction which is
  234. * fine for most RISCs but a bit limiting for CISC architectures.
  235. * They would probably benefit from a more tailored plugin.
  236. * However we can fall back to individual instruction counting.
  237. */
  238. qemu_plugin_insn_data(insn, &opcode, sizeof(opcode));
  239. for (i = 0; !cnt && i < class_table_sz; i++) {
  240. class = &class_table[i];
  241. uint32_t masked_bits = opcode & class->mask;
  242. if (masked_bits == class->pattern) {
  243. break;
  244. }
  245. }
  246. g_assert(class);
  247. switch (class->what) {
  248. case COUNT_NONE:
  249. return NULL;
  250. case COUNT_CLASS:
  251. return class->count.score;
  252. case COUNT_INDIVIDUAL:
  253. {
  254. InsnExecCount *icount;
  255. g_mutex_lock(&lock);
  256. icount = (InsnExecCount *) g_hash_table_lookup(insns,
  257. (gpointer)(intptr_t) opcode);
  258. if (!icount) {
  259. icount = g_new0(InsnExecCount, 1);
  260. icount->opcode = opcode;
  261. icount->insn = qemu_plugin_insn_disas(insn);
  262. icount->class = class;
  263. struct qemu_plugin_scoreboard *score =
  264. qemu_plugin_scoreboard_new(sizeof(uint64_t));
  265. icount->count = qemu_plugin_scoreboard_u64(score);
  266. g_hash_table_insert(insns, (gpointer)(intptr_t) opcode, icount);
  267. }
  268. g_mutex_unlock(&lock);
  269. return icount->count.score;
  270. }
  271. default:
  272. g_assert_not_reached();
  273. }
  274. return NULL;
  275. }
  276. static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
  277. {
  278. size_t n = qemu_plugin_tb_n_insns(tb);
  279. size_t i;
  280. for (i = 0; i < n; i++) {
  281. struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
  282. struct qemu_plugin_scoreboard *cnt = find_counter(insn);
  283. if (cnt) {
  284. if (do_inline) {
  285. qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
  286. insn, QEMU_PLUGIN_INLINE_ADD_U64,
  287. qemu_plugin_scoreboard_u64(cnt), 1);
  288. } else {
  289. qemu_plugin_register_vcpu_insn_exec_cb(
  290. insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
  291. }
  292. }
  293. }
  294. }
  295. QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
  296. const qemu_info_t *info,
  297. int argc, char **argv)
  298. {
  299. int i;
  300. for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
  301. for (int j = 0; j < class_tables[i].table_sz; ++j) {
  302. struct qemu_plugin_scoreboard *score =
  303. qemu_plugin_scoreboard_new(sizeof(uint64_t));
  304. class_tables[i].table[j].count = qemu_plugin_scoreboard_u64(score);
  305. }
  306. }
  307. /* Select a class table appropriate to the guest architecture */
  308. for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
  309. ClassSelector *entry = &class_tables[i];
  310. if (!entry->qemu_target ||
  311. strcmp(entry->qemu_target, info->target_name) == 0) {
  312. class_table = entry->table;
  313. class_table_sz = entry->table_sz;
  314. break;
  315. }
  316. }
  317. for (i = 0; i < argc; i++) {
  318. char *p = argv[i];
  319. g_auto(GStrv) tokens = g_strsplit(p, "=", -1);
  320. if (g_strcmp0(tokens[0], "inline") == 0) {
  321. if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
  322. fprintf(stderr, "boolean argument parsing failed: %s\n", p);
  323. return -1;
  324. }
  325. } else if (g_strcmp0(tokens[0], "verbose") == 0) {
  326. if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
  327. fprintf(stderr, "boolean argument parsing failed: %s\n", p);
  328. return -1;
  329. }
  330. } else if (g_strcmp0(tokens[0], "count") == 0) {
  331. char *value = tokens[1];
  332. int j;
  333. CountType type = COUNT_INDIVIDUAL;
  334. if (*value == '!') {
  335. type = COUNT_NONE;
  336. value++;
  337. }
  338. for (j = 0; j < class_table_sz; j++) {
  339. if (strcmp(value, class_table[j].opt) == 0) {
  340. class_table[j].what = type;
  341. break;
  342. }
  343. }
  344. } else {
  345. fprintf(stderr, "option parsing failed: %s\n", p);
  346. return -1;
  347. }
  348. }
  349. plugin_init();
  350. qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
  351. qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
  352. return 0;
  353. }