2
0

cache.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853
  1. /*
  2. * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
  3. *
  4. * License: GNU GPL, version 2 or later.
  5. * See the COPYING file in the top-level directory.
  6. */
  7. #include <inttypes.h>
  8. #include <stdio.h>
  9. #include <glib.h>
  10. #include <qemu-plugin.h>
  11. #define STRTOLL(x) g_ascii_strtoll(x, NULL, 10)
  12. QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
  13. static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
  14. static GHashTable *miss_ht;
  15. static GMutex hashtable_lock;
  16. static GRand *rng;
  17. static int limit;
  18. static bool sys;
  19. enum EvictionPolicy {
  20. LRU,
  21. FIFO,
  22. RAND,
  23. };
  24. enum EvictionPolicy policy;
  25. /*
  26. * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
  27. * put in any of the blocks inside the set. The number of block per set is
  28. * called the associativity (assoc).
  29. *
  30. * Each block contains the stored tag and a valid bit. Since this is not
  31. * a functional simulator, the data itself is not stored. We only identify
  32. * whether a block is in the cache or not by searching for its tag.
  33. *
  34. * In order to search for memory data in the cache, the set identifier and tag
  35. * are extracted from the address and the set is probed to see whether a tag
  36. * match occur.
  37. *
  38. * An address is logically divided into three portions: The block offset,
  39. * the set number, and the tag.
  40. *
  41. * The set number is used to identify the set in which the block may exist.
  42. * The tag is compared against all the tags of a set to search for a match. If a
  43. * match is found, then the access is a hit.
  44. *
  45. * The CacheSet also contains bookkeaping information about eviction details.
  46. */
  47. typedef struct {
  48. uint64_t tag;
  49. bool valid;
  50. } CacheBlock;
  51. typedef struct {
  52. CacheBlock *blocks;
  53. uint64_t *lru_priorities;
  54. uint64_t lru_gen_counter;
  55. GQueue *fifo_queue;
  56. } CacheSet;
  57. typedef struct {
  58. CacheSet *sets;
  59. int num_sets;
  60. int cachesize;
  61. int assoc;
  62. int blksize_shift;
  63. uint64_t set_mask;
  64. uint64_t tag_mask;
  65. uint64_t accesses;
  66. uint64_t misses;
  67. } Cache;
  68. typedef struct {
  69. char *disas_str;
  70. const char *symbol;
  71. uint64_t addr;
  72. uint64_t l1_dmisses;
  73. uint64_t l1_imisses;
  74. uint64_t l2_misses;
  75. } InsnData;
  76. void (*update_hit)(Cache *cache, int set, int blk);
  77. void (*update_miss)(Cache *cache, int set, int blk);
  78. void (*metadata_init)(Cache *cache);
  79. void (*metadata_destroy)(Cache *cache);
  80. static int cores;
  81. static Cache **l1_dcaches, **l1_icaches;
  82. static bool use_l2;
  83. static Cache **l2_ucaches;
  84. static GMutex *l1_dcache_locks;
  85. static GMutex *l1_icache_locks;
  86. static GMutex *l2_ucache_locks;
  87. static uint64_t l1_dmem_accesses;
  88. static uint64_t l1_imem_accesses;
  89. static uint64_t l1_imisses;
  90. static uint64_t l1_dmisses;
  91. static uint64_t l2_mem_accesses;
  92. static uint64_t l2_misses;
  93. static int pow_of_two(int num)
  94. {
  95. g_assert((num & (num - 1)) == 0);
  96. int ret = 0;
  97. while (num /= 2) {
  98. ret++;
  99. }
  100. return ret;
  101. }
  102. /*
  103. * LRU evection policy: For each set, a generation counter is maintained
  104. * alongside a priority array.
  105. *
  106. * On each set access, the generation counter is incremented.
  107. *
  108. * On a cache hit: The hit-block is assigned the current generation counter,
  109. * indicating that it is the most recently used block.
  110. *
  111. * On a cache miss: The block with the least priority is searched and replaced
  112. * with the newly-cached block, of which the priority is set to the current
  113. * generation number.
  114. */
  115. static void lru_priorities_init(Cache *cache)
  116. {
  117. int i;
  118. for (i = 0; i < cache->num_sets; i++) {
  119. cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
  120. cache->sets[i].lru_gen_counter = 0;
  121. }
  122. }
  123. static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
  124. {
  125. CacheSet *set = &cache->sets[set_idx];
  126. set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
  127. set->lru_gen_counter++;
  128. }
  129. static int lru_get_lru_block(Cache *cache, int set_idx)
  130. {
  131. int i, min_idx, min_priority;
  132. min_priority = cache->sets[set_idx].lru_priorities[0];
  133. min_idx = 0;
  134. for (i = 1; i < cache->assoc; i++) {
  135. if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
  136. min_priority = cache->sets[set_idx].lru_priorities[i];
  137. min_idx = i;
  138. }
  139. }
  140. return min_idx;
  141. }
  142. static void lru_priorities_destroy(Cache *cache)
  143. {
  144. int i;
  145. for (i = 0; i < cache->num_sets; i++) {
  146. g_free(cache->sets[i].lru_priorities);
  147. }
  148. }
  149. /*
  150. * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
  151. * stores accesses to the cache.
  152. *
  153. * On a compulsory miss: The block index is enqueued to the fifo_queue to
  154. * indicate that it's the latest cached block.
  155. *
  156. * On a conflict miss: The first-in block is removed from the cache and the new
  157. * block is put in its place and enqueued to the FIFO queue.
  158. */
  159. static void fifo_init(Cache *cache)
  160. {
  161. int i;
  162. for (i = 0; i < cache->num_sets; i++) {
  163. cache->sets[i].fifo_queue = g_queue_new();
  164. }
  165. }
  166. static int fifo_get_first_block(Cache *cache, int set)
  167. {
  168. GQueue *q = cache->sets[set].fifo_queue;
  169. return GPOINTER_TO_INT(g_queue_pop_tail(q));
  170. }
  171. static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
  172. {
  173. GQueue *q = cache->sets[set].fifo_queue;
  174. g_queue_push_head(q, (gpointer)(intptr_t) blk_idx);
  175. }
  176. static void fifo_destroy(Cache *cache)
  177. {
  178. int i;
  179. for (i = 0; i < cache->num_sets; i++) {
  180. g_queue_free(cache->sets[i].fifo_queue);
  181. }
  182. }
  183. static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
  184. {
  185. return addr & cache->tag_mask;
  186. }
  187. static inline uint64_t extract_set(Cache *cache, uint64_t addr)
  188. {
  189. return (addr & cache->set_mask) >> cache->blksize_shift;
  190. }
  191. static const char *cache_config_error(int blksize, int assoc, int cachesize)
  192. {
  193. if (cachesize % blksize != 0) {
  194. return "cache size must be divisible by block size";
  195. } else if (cachesize % (blksize * assoc) != 0) {
  196. return "cache size must be divisible by set size (assoc * block size)";
  197. } else {
  198. return NULL;
  199. }
  200. }
  201. static bool bad_cache_params(int blksize, int assoc, int cachesize)
  202. {
  203. return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
  204. }
  205. static Cache *cache_init(int blksize, int assoc, int cachesize)
  206. {
  207. Cache *cache;
  208. int i;
  209. uint64_t blk_mask;
  210. /*
  211. * This function shall not be called directly, and hence expects suitable
  212. * parameters.
  213. */
  214. g_assert(!bad_cache_params(blksize, assoc, cachesize));
  215. cache = g_new(Cache, 1);
  216. cache->assoc = assoc;
  217. cache->cachesize = cachesize;
  218. cache->num_sets = cachesize / (blksize * assoc);
  219. cache->sets = g_new(CacheSet, cache->num_sets);
  220. cache->blksize_shift = pow_of_two(blksize);
  221. cache->accesses = 0;
  222. cache->misses = 0;
  223. for (i = 0; i < cache->num_sets; i++) {
  224. cache->sets[i].blocks = g_new0(CacheBlock, assoc);
  225. }
  226. blk_mask = blksize - 1;
  227. cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
  228. cache->tag_mask = ~(cache->set_mask | blk_mask);
  229. if (metadata_init) {
  230. metadata_init(cache);
  231. }
  232. return cache;
  233. }
  234. static Cache **caches_init(int blksize, int assoc, int cachesize)
  235. {
  236. Cache **caches;
  237. int i;
  238. if (bad_cache_params(blksize, assoc, cachesize)) {
  239. return NULL;
  240. }
  241. caches = g_new(Cache *, cores);
  242. for (i = 0; i < cores; i++) {
  243. caches[i] = cache_init(blksize, assoc, cachesize);
  244. }
  245. return caches;
  246. }
  247. static int get_invalid_block(Cache *cache, uint64_t set)
  248. {
  249. int i;
  250. for (i = 0; i < cache->assoc; i++) {
  251. if (!cache->sets[set].blocks[i].valid) {
  252. return i;
  253. }
  254. }
  255. return -1;
  256. }
  257. static int get_replaced_block(Cache *cache, int set)
  258. {
  259. switch (policy) {
  260. case RAND:
  261. return g_rand_int_range(rng, 0, cache->assoc);
  262. case LRU:
  263. return lru_get_lru_block(cache, set);
  264. case FIFO:
  265. return fifo_get_first_block(cache, set);
  266. default:
  267. g_assert_not_reached();
  268. }
  269. }
  270. static int in_cache(Cache *cache, uint64_t addr)
  271. {
  272. int i;
  273. uint64_t tag, set;
  274. tag = extract_tag(cache, addr);
  275. set = extract_set(cache, addr);
  276. for (i = 0; i < cache->assoc; i++) {
  277. if (cache->sets[set].blocks[i].tag == tag &&
  278. cache->sets[set].blocks[i].valid) {
  279. return i;
  280. }
  281. }
  282. return -1;
  283. }
  284. /**
  285. * access_cache(): Simulate a cache access
  286. * @cache: The cache under simulation
  287. * @addr: The address of the requested memory location
  288. *
  289. * Returns true if the requested data is hit in the cache and false when missed.
  290. * The cache is updated on miss for the next access.
  291. */
  292. static bool access_cache(Cache *cache, uint64_t addr)
  293. {
  294. int hit_blk, replaced_blk;
  295. uint64_t tag, set;
  296. tag = extract_tag(cache, addr);
  297. set = extract_set(cache, addr);
  298. hit_blk = in_cache(cache, addr);
  299. if (hit_blk != -1) {
  300. if (update_hit) {
  301. update_hit(cache, set, hit_blk);
  302. }
  303. return true;
  304. }
  305. replaced_blk = get_invalid_block(cache, set);
  306. if (replaced_blk == -1) {
  307. replaced_blk = get_replaced_block(cache, set);
  308. }
  309. if (update_miss) {
  310. update_miss(cache, set, replaced_blk);
  311. }
  312. cache->sets[set].blocks[replaced_blk].tag = tag;
  313. cache->sets[set].blocks[replaced_blk].valid = true;
  314. return false;
  315. }
  316. static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
  317. uint64_t vaddr, void *userdata)
  318. {
  319. uint64_t effective_addr;
  320. struct qemu_plugin_hwaddr *hwaddr;
  321. int cache_idx;
  322. InsnData *insn;
  323. bool hit_in_l1;
  324. hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
  325. if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
  326. return;
  327. }
  328. effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
  329. cache_idx = vcpu_index % cores;
  330. g_mutex_lock(&l1_dcache_locks[cache_idx]);
  331. hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr);
  332. if (!hit_in_l1) {
  333. insn = userdata;
  334. __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST);
  335. l1_dcaches[cache_idx]->misses++;
  336. }
  337. l1_dcaches[cache_idx]->accesses++;
  338. g_mutex_unlock(&l1_dcache_locks[cache_idx]);
  339. if (hit_in_l1 || !use_l2) {
  340. /* No need to access L2 */
  341. return;
  342. }
  343. g_mutex_lock(&l2_ucache_locks[cache_idx]);
  344. if (!access_cache(l2_ucaches[cache_idx], effective_addr)) {
  345. insn = userdata;
  346. __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
  347. l2_ucaches[cache_idx]->misses++;
  348. }
  349. l2_ucaches[cache_idx]->accesses++;
  350. g_mutex_unlock(&l2_ucache_locks[cache_idx]);
  351. }
  352. static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
  353. {
  354. uint64_t insn_addr;
  355. InsnData *insn;
  356. int cache_idx;
  357. bool hit_in_l1;
  358. insn_addr = ((InsnData *) userdata)->addr;
  359. cache_idx = vcpu_index % cores;
  360. g_mutex_lock(&l1_icache_locks[cache_idx]);
  361. hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr);
  362. if (!hit_in_l1) {
  363. insn = userdata;
  364. __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST);
  365. l1_icaches[cache_idx]->misses++;
  366. }
  367. l1_icaches[cache_idx]->accesses++;
  368. g_mutex_unlock(&l1_icache_locks[cache_idx]);
  369. if (hit_in_l1 || !use_l2) {
  370. /* No need to access L2 */
  371. return;
  372. }
  373. g_mutex_lock(&l2_ucache_locks[cache_idx]);
  374. if (!access_cache(l2_ucaches[cache_idx], insn_addr)) {
  375. insn = userdata;
  376. __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
  377. l2_ucaches[cache_idx]->misses++;
  378. }
  379. l2_ucaches[cache_idx]->accesses++;
  380. g_mutex_unlock(&l2_ucache_locks[cache_idx]);
  381. }
  382. static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
  383. {
  384. size_t n_insns;
  385. size_t i;
  386. InsnData *data;
  387. n_insns = qemu_plugin_tb_n_insns(tb);
  388. for (i = 0; i < n_insns; i++) {
  389. struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
  390. uint64_t effective_addr = sys ? (uintptr_t) qemu_plugin_insn_haddr(insn) :
  391. qemu_plugin_insn_vaddr(insn);
  392. /*
  393. * Instructions might get translated multiple times, we do not create
  394. * new entries for those instructions. Instead, we fetch the same
  395. * entry from the hash table and register it for the callback again.
  396. */
  397. g_mutex_lock(&hashtable_lock);
  398. data = g_hash_table_lookup(miss_ht, &effective_addr);
  399. if (data == NULL) {
  400. data = g_new0(InsnData, 1);
  401. data->disas_str = qemu_plugin_insn_disas(insn);
  402. data->symbol = qemu_plugin_insn_symbol(insn);
  403. data->addr = effective_addr;
  404. g_hash_table_insert(miss_ht, &data->addr, data);
  405. }
  406. g_mutex_unlock(&hashtable_lock);
  407. qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
  408. QEMU_PLUGIN_CB_NO_REGS,
  409. rw, data);
  410. qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
  411. QEMU_PLUGIN_CB_NO_REGS, data);
  412. }
  413. }
  414. static void insn_free(gpointer data)
  415. {
  416. InsnData *insn = (InsnData *) data;
  417. g_free(insn->disas_str);
  418. g_free(insn);
  419. }
  420. static void cache_free(Cache *cache)
  421. {
  422. for (int i = 0; i < cache->num_sets; i++) {
  423. g_free(cache->sets[i].blocks);
  424. }
  425. if (metadata_destroy) {
  426. metadata_destroy(cache);
  427. }
  428. g_free(cache->sets);
  429. g_free(cache);
  430. }
  431. static void caches_free(Cache **caches)
  432. {
  433. int i;
  434. for (i = 0; i < cores; i++) {
  435. cache_free(caches[i]);
  436. }
  437. }
  438. static void append_stats_line(GString *line,
  439. uint64_t l1_daccess, uint64_t l1_dmisses,
  440. uint64_t l1_iaccess, uint64_t l1_imisses,
  441. uint64_t l2_access, uint64_t l2_misses)
  442. {
  443. double l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0;
  444. double l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0;
  445. g_string_append_printf(line, "%-14" PRIu64 " %-12" PRIu64 " %9.4lf%%"
  446. " %-14" PRIu64 " %-12" PRIu64 " %9.4lf%%",
  447. l1_daccess,
  448. l1_dmisses,
  449. l1_daccess ? l1_dmiss_rate : 0.0,
  450. l1_iaccess,
  451. l1_imisses,
  452. l1_iaccess ? l1_imiss_rate : 0.0);
  453. if (l2_access && l2_misses) {
  454. double l2_miss_rate = ((double) l2_misses) / (l2_access) * 100.0;
  455. g_string_append_printf(line,
  456. " %-12" PRIu64 " %-11" PRIu64 " %10.4lf%%",
  457. l2_access,
  458. l2_misses,
  459. l2_miss_rate);
  460. }
  461. g_string_append(line, "\n");
  462. }
  463. static void sum_stats(void)
  464. {
  465. int i;
  466. g_assert(cores > 1);
  467. for (i = 0; i < cores; i++) {
  468. l1_imisses += l1_icaches[i]->misses;
  469. l1_dmisses += l1_dcaches[i]->misses;
  470. l1_imem_accesses += l1_icaches[i]->accesses;
  471. l1_dmem_accesses += l1_dcaches[i]->accesses;
  472. if (use_l2) {
  473. l2_misses += l2_ucaches[i]->misses;
  474. l2_mem_accesses += l2_ucaches[i]->accesses;
  475. }
  476. }
  477. }
  478. static int dcmp(gconstpointer a, gconstpointer b)
  479. {
  480. InsnData *insn_a = (InsnData *) a;
  481. InsnData *insn_b = (InsnData *) b;
  482. return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1;
  483. }
  484. static int icmp(gconstpointer a, gconstpointer b)
  485. {
  486. InsnData *insn_a = (InsnData *) a;
  487. InsnData *insn_b = (InsnData *) b;
  488. return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1;
  489. }
  490. static int l2_cmp(gconstpointer a, gconstpointer b)
  491. {
  492. InsnData *insn_a = (InsnData *) a;
  493. InsnData *insn_b = (InsnData *) b;
  494. return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1;
  495. }
  496. static void log_stats(void)
  497. {
  498. int i;
  499. Cache *icache, *dcache, *l2_cache = NULL;
  500. g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
  501. " dmiss rate, insn accesses,"
  502. " insn misses, imiss rate");
  503. if (use_l2) {
  504. g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate");
  505. }
  506. g_string_append(rep, "\n");
  507. for (i = 0; i < cores; i++) {
  508. g_string_append_printf(rep, "%-8d", i);
  509. dcache = l1_dcaches[i];
  510. icache = l1_icaches[i];
  511. l2_cache = use_l2 ? l2_ucaches[i] : NULL;
  512. append_stats_line(rep, dcache->accesses, dcache->misses,
  513. icache->accesses, icache->misses,
  514. l2_cache ? l2_cache->accesses : 0,
  515. l2_cache ? l2_cache->misses : 0);
  516. }
  517. if (cores > 1) {
  518. sum_stats();
  519. g_string_append_printf(rep, "%-8s", "sum");
  520. append_stats_line(rep, l1_dmem_accesses, l1_dmisses,
  521. l1_imem_accesses, l1_imisses,
  522. l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0);
  523. }
  524. g_string_append(rep, "\n");
  525. qemu_plugin_outs(rep->str);
  526. }
  527. static void log_top_insns(void)
  528. {
  529. int i;
  530. GList *curr, *miss_insns;
  531. InsnData *insn;
  532. miss_insns = g_hash_table_get_values(miss_ht);
  533. miss_insns = g_list_sort(miss_insns, dcmp);
  534. g_autoptr(GString) rep = g_string_new("");
  535. g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
  536. for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
  537. insn = (InsnData *) curr->data;
  538. g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
  539. if (insn->symbol) {
  540. g_string_append_printf(rep, " (%s)", insn->symbol);
  541. }
  542. g_string_append_printf(rep, ", %" PRId64 ", %s\n",
  543. insn->l1_dmisses, insn->disas_str);
  544. }
  545. miss_insns = g_list_sort(miss_insns, icmp);
  546. g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
  547. for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
  548. insn = (InsnData *) curr->data;
  549. g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
  550. if (insn->symbol) {
  551. g_string_append_printf(rep, " (%s)", insn->symbol);
  552. }
  553. g_string_append_printf(rep, ", %" PRId64 ", %s\n",
  554. insn->l1_imisses, insn->disas_str);
  555. }
  556. if (!use_l2) {
  557. goto finish;
  558. }
  559. miss_insns = g_list_sort(miss_insns, l2_cmp);
  560. g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n");
  561. for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
  562. insn = (InsnData *) curr->data;
  563. g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
  564. if (insn->symbol) {
  565. g_string_append_printf(rep, " (%s)", insn->symbol);
  566. }
  567. g_string_append_printf(rep, ", %" PRId64 ", %s\n",
  568. insn->l2_misses, insn->disas_str);
  569. }
  570. finish:
  571. qemu_plugin_outs(rep->str);
  572. g_list_free(miss_insns);
  573. }
  574. static void plugin_exit(qemu_plugin_id_t id, void *p)
  575. {
  576. log_stats();
  577. log_top_insns();
  578. caches_free(l1_dcaches);
  579. caches_free(l1_icaches);
  580. g_free(l1_dcache_locks);
  581. g_free(l1_icache_locks);
  582. if (use_l2) {
  583. caches_free(l2_ucaches);
  584. g_free(l2_ucache_locks);
  585. }
  586. g_hash_table_destroy(miss_ht);
  587. }
  588. static void policy_init(void)
  589. {
  590. switch (policy) {
  591. case LRU:
  592. update_hit = lru_update_blk;
  593. update_miss = lru_update_blk;
  594. metadata_init = lru_priorities_init;
  595. metadata_destroy = lru_priorities_destroy;
  596. break;
  597. case FIFO:
  598. update_miss = fifo_update_on_miss;
  599. metadata_init = fifo_init;
  600. metadata_destroy = fifo_destroy;
  601. break;
  602. case RAND:
  603. rng = g_rand_new();
  604. break;
  605. default:
  606. g_assert_not_reached();
  607. }
  608. }
  609. QEMU_PLUGIN_EXPORT
  610. int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
  611. int argc, char **argv)
  612. {
  613. int i;
  614. int l1_iassoc, l1_iblksize, l1_icachesize;
  615. int l1_dassoc, l1_dblksize, l1_dcachesize;
  616. int l2_assoc, l2_blksize, l2_cachesize;
  617. limit = 32;
  618. sys = info->system_emulation;
  619. l1_dassoc = 8;
  620. l1_dblksize = 64;
  621. l1_dcachesize = l1_dblksize * l1_dassoc * 32;
  622. l1_iassoc = 8;
  623. l1_iblksize = 64;
  624. l1_icachesize = l1_iblksize * l1_iassoc * 32;
  625. l2_assoc = 16;
  626. l2_blksize = 64;
  627. l2_cachesize = l2_assoc * l2_blksize * 2048;
  628. policy = LRU;
  629. cores = sys ? info->system.smp_vcpus : 1;
  630. for (i = 0; i < argc; i++) {
  631. char *opt = argv[i];
  632. g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
  633. if (g_strcmp0(tokens[0], "iblksize") == 0) {
  634. l1_iblksize = STRTOLL(tokens[1]);
  635. } else if (g_strcmp0(tokens[0], "iassoc") == 0) {
  636. l1_iassoc = STRTOLL(tokens[1]);
  637. } else if (g_strcmp0(tokens[0], "icachesize") == 0) {
  638. l1_icachesize = STRTOLL(tokens[1]);
  639. } else if (g_strcmp0(tokens[0], "dblksize") == 0) {
  640. l1_dblksize = STRTOLL(tokens[1]);
  641. } else if (g_strcmp0(tokens[0], "dassoc") == 0) {
  642. l1_dassoc = STRTOLL(tokens[1]);
  643. } else if (g_strcmp0(tokens[0], "dcachesize") == 0) {
  644. l1_dcachesize = STRTOLL(tokens[1]);
  645. } else if (g_strcmp0(tokens[0], "limit") == 0) {
  646. limit = STRTOLL(tokens[1]);
  647. } else if (g_strcmp0(tokens[0], "cores") == 0) {
  648. cores = STRTOLL(tokens[1]);
  649. } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) {
  650. use_l2 = true;
  651. l2_cachesize = STRTOLL(tokens[1]);
  652. } else if (g_strcmp0(tokens[0], "l2blksize") == 0) {
  653. use_l2 = true;
  654. l2_blksize = STRTOLL(tokens[1]);
  655. } else if (g_strcmp0(tokens[0], "l2assoc") == 0) {
  656. use_l2 = true;
  657. l2_assoc = STRTOLL(tokens[1]);
  658. } else if (g_strcmp0(tokens[0], "l2") == 0) {
  659. if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) {
  660. fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
  661. return -1;
  662. }
  663. } else if (g_strcmp0(tokens[0], "evict") == 0) {
  664. if (g_strcmp0(tokens[1], "rand") == 0) {
  665. policy = RAND;
  666. } else if (g_strcmp0(tokens[1], "lru") == 0) {
  667. policy = LRU;
  668. } else if (g_strcmp0(tokens[1], "fifo") == 0) {
  669. policy = FIFO;
  670. } else {
  671. fprintf(stderr, "invalid eviction policy: %s\n", opt);
  672. return -1;
  673. }
  674. } else {
  675. fprintf(stderr, "option parsing failed: %s\n", opt);
  676. return -1;
  677. }
  678. }
  679. policy_init();
  680. l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize);
  681. if (!l1_dcaches) {
  682. const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize);
  683. fprintf(stderr, "dcache cannot be constructed from given parameters\n");
  684. fprintf(stderr, "%s\n", err);
  685. return -1;
  686. }
  687. l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize);
  688. if (!l1_icaches) {
  689. const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize);
  690. fprintf(stderr, "icache cannot be constructed from given parameters\n");
  691. fprintf(stderr, "%s\n", err);
  692. return -1;
  693. }
  694. l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL;
  695. if (!l2_ucaches && use_l2) {
  696. const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize);
  697. fprintf(stderr, "L2 cache cannot be constructed from given parameters\n");
  698. fprintf(stderr, "%s\n", err);
  699. return -1;
  700. }
  701. l1_dcache_locks = g_new0(GMutex, cores);
  702. l1_icache_locks = g_new0(GMutex, cores);
  703. l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL;
  704. qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
  705. qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
  706. miss_ht = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, insn_free);
  707. return 0;
  708. }