瀏覽代碼

[MCA] Show aggregate over Average Wait times for the whole snippet (PR43219)

Summary:
As disscused in https://bugs.llvm.org/show_bug.cgi?id=43219,
i believe it may be somewhat useful to show //some// aggregates
over all the sea of statistics provided.

Example:
```
Average Wait times (based on the timeline view):
[0]: Executions
[1]: Average time spent waiting in a scheduler's queue
[2]: Average time spent waiting in a scheduler's queue while ready
[3]: Average time elapsed from WB until retire stage

      [0]    [1]    [2]    [3]
0.     3     1.0    1.0    4.7       vmulps     %xmm0, %xmm1, %xmm2
1.     3     2.7    0.0    2.3       vhaddps    %xmm2, %xmm2, %xmm3
2.     3     6.0    0.0    0.0       vhaddps    %xmm3, %xmm3, %xmm4
       3     3.2    0.3    2.3       <total>
```
I.e. we average the averages.

Reviewers: andreadb, mattd, RKSimon

Reviewed By: andreadb

Subscribers: gbedwell, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D68714

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374361 91177308-0d34-0410-b5e6-96231b3b80d8
Roman Lebedev 5 年之前
父節點
當前提交
84f8a5f06c
共有 100 個文件被更改,包括 147 次插入1 次删除
  1. 3 1
      docs/CommandGuide/llvm-mca.rst
  2. 1 0
      test/tools/llvm-mca/ARM/memcpy-ldm-stm.s
  3. 1 0
      test/tools/llvm-mca/ARM/vld1-index-update.s
  4. 1 0
      test/tools/llvm-mca/SystemZ/stm-lm.s
  5. 1 0
      test/tools/llvm-mca/X86/Barcelona/clear-super-register-1.s
  6. 2 0
      test/tools/llvm-mca/X86/Barcelona/clear-super-register-2.s
  7. 1 0
      test/tools/llvm-mca/X86/Barcelona/dependency-breaking-cmp.s
  8. 1 0
      test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpeq.s
  9. 1 0
      test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpgt.s
  10. 1 0
      test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-1.s
  11. 1 0
      test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-2.s
  12. 1 0
      test/tools/llvm-mca/X86/Barcelona/int-to-fpu-forwarding-3.s
  13. 6 0
      test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s
  14. 6 0
      test/tools/llvm-mca/X86/Barcelona/load-throughput.s
  15. 1 0
      test/tools/llvm-mca/X86/Barcelona/one-idioms.s
  16. 1 0
      test/tools/llvm-mca/X86/Barcelona/partial-reg-update-2.s
  17. 1 0
      test/tools/llvm-mca/X86/Barcelona/partial-reg-update-3.s
  18. 1 0
      test/tools/llvm-mca/X86/Barcelona/partial-reg-update-4.s
  19. 1 0
      test/tools/llvm-mca/X86/Barcelona/partial-reg-update-6.s
  20. 1 0
      test/tools/llvm-mca/X86/Barcelona/partial-reg-update-7.s
  21. 1 0
      test/tools/llvm-mca/X86/Barcelona/partial-reg-update.s
  22. 1 0
      test/tools/llvm-mca/X86/Barcelona/read-advance-1.s
  23. 1 0
      test/tools/llvm-mca/X86/Barcelona/read-advance-2.s
  24. 1 0
      test/tools/llvm-mca/X86/Barcelona/read-advance-3.s
  25. 1 0
      test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-1.s
  26. 1 0
      test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-2.s
  27. 1 0
      test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-3.s
  28. 1 0
      test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-4.s
  29. 1 0
      test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-5.s
  30. 1 0
      test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-6.s
  31. 6 0
      test/tools/llvm-mca/X86/Barcelona/store-throughput.s
  32. 1 0
      test/tools/llvm-mca/X86/Barcelona/zero-idioms.s
  33. 1 0
      test/tools/llvm-mca/X86/BdVer2/add-sequence.s
  34. 1 0
      test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s
  35. 1 0
      test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s
  36. 2 0
      test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s
  37. 1 0
      test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s
  38. 1 0
      test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s
  39. 1 0
      test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s
  40. 1 0
      test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s
  41. 1 0
      test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s
  42. 1 0
      test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s
  43. 1 0
      test/tools/llvm-mca/X86/BdVer2/dot-product.s
  44. 1 0
      test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s
  45. 1 0
      test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s
  46. 1 0
      test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s
  47. 1 0
      test/tools/llvm-mca/X86/BdVer2/load-store-alias.s
  48. 6 0
      test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s
  49. 7 0
      test/tools/llvm-mca/X86/BdVer2/load-throughput.s
  50. 1 0
      test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s
  51. 1 0
      test/tools/llvm-mca/X86/BdVer2/one-idioms.s
  52. 1 0
      test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s
  53. 1 0
      test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s
  54. 1 0
      test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s
  55. 1 0
      test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s
  56. 1 0
      test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s
  57. 1 0
      test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s
  58. 1 0
      test/tools/llvm-mca/X86/BdVer2/pr37790.s
  59. 1 0
      test/tools/llvm-mca/X86/BdVer2/rank.s
  60. 1 0
      test/tools/llvm-mca/X86/BdVer2/read-advance-1.s
  61. 1 0
      test/tools/llvm-mca/X86/BdVer2/read-advance-2.s
  62. 1 0
      test/tools/llvm-mca/X86/BdVer2/read-advance-3.s
  63. 1 0
      test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s
  64. 1 0
      test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s
  65. 1 0
      test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s
  66. 1 0
      test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s
  67. 1 0
      test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s
  68. 1 0
      test/tools/llvm-mca/X86/BdVer2/register-files-1.s
  69. 1 0
      test/tools/llvm-mca/X86/BdVer2/register-files-2.s
  70. 1 0
      test/tools/llvm-mca/X86/BdVer2/register-files-5.s
  71. 7 0
      test/tools/llvm-mca/X86/BdVer2/store-throughput.s
  72. 1 0
      test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s
  73. 1 0
      test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s
  74. 1 0
      test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s
  75. 1 0
      test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s
  76. 1 0
      test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s
  77. 5 0
      test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s
  78. 1 0
      test/tools/llvm-mca/X86/BdVer2/zero-idioms.s
  79. 1 0
      test/tools/llvm-mca/X86/Broadwell/zero-idioms.s
  80. 1 0
      test/tools/llvm-mca/X86/BtVer2/add-sequence.s
  81. 1 0
      test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s
  82. 1 0
      test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s
  83. 1 0
      test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s
  84. 1 0
      test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
  85. 8 0
      test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s
  86. 1 0
      test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s
  87. 1 0
      test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s
  88. 1 0
      test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s
  89. 1 0
      test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s
  90. 1 0
      test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s
  91. 1 0
      test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s
  92. 1 0
      test/tools/llvm-mca/X86/BtVer2/dot-product.s
  93. 1 0
      test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s
  94. 1 0
      test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s
  95. 1 0
      test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s
  96. 1 0
      test/tools/llvm-mca/X86/BtVer2/load-store-alias.s
  97. 1 0
      test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s
  98. 1 0
      test/tools/llvm-mca/X86/BtVer2/one-idioms.s
  99. 1 0
      test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s
  100. 1 0
      test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s

+ 3 - 1
docs/CommandGuide/llvm-mca.rst

@@ -523,6 +523,7 @@ Below is the timeline view for a subset of the dot-product example located in
   0.     3     1.0    1.0    3.3       vmulps	%xmm0, %xmm1, %xmm2
   1.     3     3.3    0.7    1.0       vhaddps	%xmm2, %xmm2, %xmm3
   2.     3     5.7    0.0    0.0       vhaddps	%xmm3, %xmm3, %xmm4
+         3     3.3    0.5    1.4       <total>
 
 The timeline view is interesting because it shows instruction state changes
 during execution.  It also gives an idea of how the tool processes instructions
@@ -574,7 +575,8 @@ and therefore consuming physical registers).
 
 Table *Average Wait times* helps diagnose performance issues that are caused by
 the presence of long latency instructions and potentially long data dependencies
-which may limit the ILP.  Note that :program:`llvm-mca`, by default, assumes at
+which may limit the ILP. Last row, ``<total>``, shows a global average over all
+instructions measured. Note that :program:`llvm-mca`, by default, assumes at
 least 1cy between the dispatch event and the issue event.
 
 When the performance is limited by data dependencies and/or long latency

+ 1 - 0
test/tools/llvm-mca/ARM/memcpy-ldm-stm.s

@@ -63,3 +63,4 @@
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     1.0    1.0    0.0       ldm	r2!, {r3, r4, r5, r6, r12, lr}
 # CHECK-NEXT: 1.     3     18.3   0.3    0.0       stm	r0!, {r3, r4, r5, r6, r12, lr}
+# CHECK-NEXT:        3     9.7    0.7    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/ARM/vld1-index-update.s

@@ -70,3 +70,4 @@ vld1.32	{d16, d17}, [r1]!
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     5     3.0    0.2    1.6       add	r1, r1, r12
 # CHECK-NEXT: 1.     5     4.0    0.0    0.0       vld1.32	{d16, d17}, [r1]!
+# CHECK-NEXT:        5     3.5    0.1    0.8       <total>

+ 1 - 0
test/tools/llvm-mca/SystemZ/stm-lm.s

@@ -70,3 +70,4 @@ lmg	%r6, %r15, 48(%r15)
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     9.7    0.3    0.0       stmg	%r6, %r15, 48(%r15)
 # CHECK-NEXT: 1.     3     9.0    0.3    0.0       lmg	%r6, %r15, 48(%r15)
+# CHECK-NEXT:        3     9.3    0.3    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/clear-super-register-1.s

@@ -61,3 +61,4 @@ bsf   %rax, %rcx
 # CHECK-NEXT: 1.     2     5.5    1.5    0.0       lzcntl	%ecx, %eax
 # CHECK-NEXT: 2.     2     8.5    0.0    0.0       andq	%rcx, %rax
 # CHECK-NEXT: 3.     2     9.5    0.0    0.0       bsfq	%rax, %rcx
+# CHECK-NEXT:        2     7.0    0.5    0.0       <total>

+ 2 - 0
test/tools/llvm-mca/X86/Barcelona/clear-super-register-2.s

@@ -66,6 +66,7 @@ addps  %xmm0, %xmm0
 # CHECK-NEXT: 0.     3     14.7   8.0    0.0       sqrtss	%xmm0, %xmm0
 # CHECK-NEXT: 1.     3     1.0    1.0    21.3      movss	(%eax), %xmm0
 # CHECK-NEXT: 2.     3     7.0    0.3    18.0      addps	%xmm0, %xmm0
+# CHECK-NEXT:        3     7.6    3.1    13.1      <total>
 
 # CHECK:      [1] Code Region
 
@@ -116,3 +117,4 @@ addps  %xmm0, %xmm0
 # CHECK-NEXT: 0.     3     21.7   15.0   0.0       sqrtsd	%xmm0, %xmm0
 # CHECK-NEXT: 1.     3     1.0    1.0    35.3      movsd	(%eax), %xmm0
 # CHECK-NEXT: 2.     3     7.0    0.3    32.0      addps	%xmm0, %xmm0
+# CHECK-NEXT:        3     9.9    5.4    22.4      <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/dependency-breaking-cmp.s

@@ -68,3 +68,4 @@ cmovae %ebx, %eax
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     3.7    0.3    0.0       cmpl	%eax, %eax
 # CHECK-NEXT: 1.     3     4.0    0.0    0.0       cmovael	%ebx, %eax
+# CHECK-NEXT:        3     3.8    0.2    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpeq.s

@@ -105,3 +105,4 @@ pcmpeqw %xmm0, %xmm0
 # CHECK-NEXT: 4.     3     3.7    0.0    12.0      pcmpeqd	%xmm0, %xmm0
 # CHECK-NEXT: 5.     3     4.3    0.0    11.0      pcmpeqq	%xmm0, %xmm0
 # CHECK-NEXT: 6.     3     5.0    0.0    10.0      pcmpeqw	%xmm0, %xmm0
+# CHECK-NEXT:        3     7.1    0.1    6.6       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/dependency-breaking-pcmpgt.s

@@ -106,3 +106,4 @@ pcmpgtw %xmm0, %xmm0
 # CHECK-NEXT: 4.     3     0.0    0.0    16.7      pcmpgtd	%xmm0, %xmm0
 # CHECK-NEXT: 5.     3     0.0    0.0    16.3      pcmpgtq	%xmm0, %xmm0
 # CHECK-NEXT: 6.     3     0.0    0.0    16.0      pcmpgtw	%xmm0, %xmm0
+# CHECK-NEXT:        3     4.9    0.0    9.4       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-1.s

@@ -69,3 +69,4 @@ sbb %eax, %eax
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     4.0    0.3    0.0       sbbl	%edx, %edx
 # CHECK-NEXT: 1.     3     6.0    0.0    0.0       sbbl	%eax, %eax
+# CHECK-NEXT:        3     5.0    0.2    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/dependency-breaking-sbb-2.s

@@ -76,3 +76,4 @@ sbb %eax, %eax
 # CHECK-NEXT: 0.     3     5.0    0.3    0.0       imull	%edx, %eax
 # CHECK-NEXT: 1.     3     1.0    0.3    6.0       addl	%edx, %edx
 # CHECK-NEXT: 2.     3     8.0    0.0    0.0       sbbl	%eax, %eax
+# CHECK-NEXT:        3     4.7    0.2    2.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/int-to-fpu-forwarding-3.s

@@ -72,3 +72,4 @@ pinsrw $1, %eax, %xmm0
 # CHECK-NEXT: 0.     3     1.0    0.7    2.7       addl	%eax, %eax
 # CHECK-NEXT: 1.     3     4.3    0.0    0.0       pinsrw	$0, %eax, %xmm0
 # CHECK-NEXT: 2.     3     5.7    0.0    0.0       pinsrw	$1, %eax, %xmm0
+# CHECK-NEXT:        3     3.7    0.2    0.9       <total>

+ 6 - 0
test/tools/llvm-mca/X86/Barcelona/load-store-throughput.s

@@ -138,6 +138,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movb	(%rcx), %bpl
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movb	(%rdx), %sil
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movb	%dil, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [1] Code Region
 
@@ -234,6 +235,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movw	(%rcx), %bp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movw	(%rdx), %si
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movw	%di, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [2] Code Region
 
@@ -330,6 +332,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movl	(%rcx), %ebp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movl	(%rdx), %esi
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movl	%edi, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [3] Code Region
 
@@ -426,6 +429,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movq	(%rcx), %rbp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movq	(%rdx), %rsi
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movq	%rdi, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [4] Code Region
 
@@ -522,6 +526,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movd	(%rcx), %mm1
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movd	(%rdx), %mm2
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movd	%mm3, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [5] Code Region
 
@@ -619,3 +624,4 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movaps	(%rcx), %xmm1
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movaps	(%rdx), %xmm2
 # CHECK-NEXT: 3.     1     8.0    0.0    0.0       movaps	%xmm3, (%rbx)
+# CHECK-NEXT:        1     3.0    1.0    0.0       <total>

+ 6 - 0
test/tools/llvm-mca/X86/Barcelona/load-throughput.s

@@ -137,6 +137,7 @@ movaps (%rbx), %xmm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movb	(%rcx), %bpl
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movb	(%rdx), %sil
 # CHECK-NEXT: 3.     1     2.0    2.0    0.0       movb	(%rbx), %dil
+# CHECK-NEXT:        1     1.5    1.5    0.0       <total>
 
 # CHECK:      [1] Code Region
 
@@ -232,6 +233,7 @@ movaps (%rbx), %xmm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movw	(%rcx), %bp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movw	(%rdx), %si
 # CHECK-NEXT: 3.     1     2.0    2.0    0.0       movw	(%rbx), %di
+# CHECK-NEXT:        1     1.5    1.5    0.0       <total>
 
 # CHECK:      [2] Code Region
 
@@ -327,6 +329,7 @@ movaps (%rbx), %xmm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movl	(%rcx), %ebp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movl	(%rdx), %esi
 # CHECK-NEXT: 3.     1     2.0    2.0    0.0       movl	(%rbx), %edi
+# CHECK-NEXT:        1     1.5    1.5    0.0       <total>
 
 # CHECK:      [3] Code Region
 
@@ -422,6 +425,7 @@ movaps (%rbx), %xmm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movq	(%rcx), %rbp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movq	(%rdx), %rsi
 # CHECK-NEXT: 3.     1     2.0    2.0    0.0       movq	(%rbx), %rdi
+# CHECK-NEXT:        1     1.5    1.5    0.0       <total>
 
 # CHECK:      [4] Code Region
 
@@ -517,6 +521,7 @@ movaps (%rbx), %xmm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movd	(%rcx), %mm1
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movd	(%rdx), %mm2
 # CHECK-NEXT: 3.     1     2.0    2.0    0.0       movd	(%rbx), %mm3
+# CHECK-NEXT:        1     1.5    1.5    0.0       <total>
 
 # CHECK:      [5] Code Region
 
@@ -612,3 +617,4 @@ movaps (%rbx), %xmm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movaps	(%rcx), %xmm1
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movaps	(%rdx), %xmm2
 # CHECK-NEXT: 3.     1     2.0    2.0    0.0       movaps	(%rbx), %xmm3
+# CHECK-NEXT:        1     1.5    1.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/one-idioms.s

@@ -94,3 +94,4 @@ pcmpeqw %xmm2, %xmm2
 # CHECK-NEXT: 4.     1     1.0    0.0    7.0       pcmpeqd	%xmm2, %xmm2
 # CHECK-NEXT: 5.     1     2.0    0.0    6.0       pcmpeqq	%xmm2, %xmm2
 # CHECK-NEXT: 6.     1     3.0    0.0    5.0       pcmpeqw	%xmm2, %xmm2
+# CHECK-NEXT:        1     2.7    0.3    3.7       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/partial-reg-update-2.s

@@ -45,3 +45,4 @@ add    %ecx, %ebx
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq	%rax, %rbx
 # CHECK-NEXT: 1.     1     2.0    2.0    0.0       lzcntw	%ax, %bx
 # CHECK-NEXT: 2.     1     5.0    0.0    0.0       addl	%ecx, %ebx
+# CHECK-NEXT:        1     2.7    1.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/partial-reg-update-3.s

@@ -74,3 +74,4 @@ xor %bx, %dx
 # CHECK-NEXT: 0.     3     2.3    0.3    0.0       addw	%cx, %dx
 # CHECK-NEXT: 1.     3     1.0    1.0    1.0       movw	%ax, %dx
 # CHECK-NEXT: 2.     3     1.7    0.0    0.3       xorw	%bx, %dx
+# CHECK-NEXT:        3     1.7    0.4    0.4       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/partial-reg-update-4.s

@@ -75,3 +75,4 @@ add %cx, %bx
 # CHECK-NEXT: 0.     3     4.3    0.3    0.0       imulw	%ax, %bx
 # CHECK-NEXT: 1.     3     2.3    2.3    2.0       lzcntw	%ax, %bx
 # CHECK-NEXT: 2.     3     5.0    0.0    1.3       addw	%cx, %bx
+# CHECK-NEXT:        3     3.9    0.9    1.1       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/partial-reg-update-6.s

@@ -77,3 +77,4 @@ lzcnt 2(%rsp), %cx
 # CHECK-NEXT: 0.     3     7.3    0.3    0.0       imull	%edx, %ecx
 # CHECK-NEXT: 1.     3     2.3    2.3    1.7       lzcntw	(%rsp), %cx
 # CHECK-NEXT: 2.     3     2.7    2.7    1.0       lzcntw	2(%rsp), %cx
+# CHECK-NEXT:        3     4.1    1.8    0.9       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/partial-reg-update-7.s

@@ -96,3 +96,4 @@ cmpl $1025, %eax
 # CHECK-NEXT: 2.     5     9.4    0.0    0.0       shll	$2, %eax
 # CHECK-NEXT: 3.     5     10.2   0.0    0.0       imull	%ecx, %eax
 # CHECK-NEXT: 4.     5     12.8   0.0    0.0       cmpl	$1025, %eax
+# CHECK-NEXT:        5     10.1   0.1    0.2       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/partial-reg-update.s

@@ -45,3 +45,4 @@ add  %ecx, %ebx
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulw	%ax, %cx
 # CHECK-NEXT: 1.     1     4.0    0.0    0.0       addb	%al, %cl
 # CHECK-NEXT: 2.     1     5.0    0.0    0.0       addl	%ecx, %ebx
+# CHECK-NEXT:        1     3.3    0.3    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/read-advance-1.s

@@ -46,3 +46,4 @@ mulps  (%rdi), %xmm1
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       addps	%xmm0, %xmm1
 # CHECK-NEXT: 1.     1     1.0    0.0    0.0       mulps	(%rdi), %xmm1
+# CHECK-NEXT:        1     1.0    0.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/read-advance-2.s

@@ -45,3 +45,4 @@ imull  (%rdi)
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imull	%esi
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       imull	(%rdi)
+# CHECK-NEXT:        1     1.0    1.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/read-advance-3.s

@@ -45,3 +45,4 @@ add %rdx, %r8
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       addq	%rdi, %rsi
 # CHECK-NEXT: 1.     1     1.0    0.0    0.0       addq	(%rsp), %rsi
 # CHECK-NEXT: 2.     1     1.0    1.0    4.0       addq	%rdx, %r8
+# CHECK-NEXT:        1     1.0    0.7    1.3       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-1.s

@@ -78,3 +78,4 @@ addps %xmm1, %xmm1
 # CHECK-NEXT: 0.     3     0.0    0.0    3.3       xorps	%xmm0, %xmm0
 # CHECK-NEXT: 1.     3     1.3    1.3    1.3       movaps	%xmm0, %xmm1
 # CHECK-NEXT: 2.     3     2.0    0.0    0.0       addps	%xmm1, %xmm1
+# CHECK-NEXT:        3     1.1    0.4    1.6       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-2.s

@@ -119,3 +119,4 @@ movdqu %xmm5, %xmm0
 # CHECK-NEXT: 6.     3     7.7    0.0    0.0       movupd	%xmm3, %xmm4
 # CHECK-NEXT: 7.     3     8.3    0.0    0.0       movdqa	%xmm4, %xmm5
 # CHECK-NEXT: 8.     3     9.0    0.0    0.0       movdqu	%xmm5, %xmm0
+# CHECK-NEXT:        3     5.7    0.2    0.9       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-3.s

@@ -104,3 +104,4 @@ movdqu %xmm5, %xmm0
 # CHECK-NEXT: 4.     3     7.7    0.0    0.0       movupd	%xmm3, %xmm4
 # CHECK-NEXT: 5.     3     8.3    0.0    0.0       movdqa	%xmm4, %xmm5
 # CHECK-NEXT: 6.     3     9.0    0.0    0.0       movdqu	%xmm5, %xmm0
+# CHECK-NEXT:        3     7.0    0.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-4.s

@@ -90,3 +90,4 @@ mov %edx, %eax
 # CHECK-NEXT: 2.     3     4.7    0.0    0.0       movl	%ebx, %ecx
 # CHECK-NEXT: 3.     3     5.3    0.0    0.0       movl	%ecx, %edx
 # CHECK-NEXT: 4.     3     6.0    0.0    0.0       movl	%edx, %eax
+# CHECK-NEXT:        3     4.7    0.1    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-5.s

@@ -90,3 +90,4 @@ mov %rdx, %rax
 # CHECK-NEXT: 2.     3     4.7    0.0    0.0       movq	%rbx, %rcx
 # CHECK-NEXT: 3.     3     5.3    0.0    0.0       movq	%rcx, %rdx
 # CHECK-NEXT: 4.     3     6.0    0.0    0.0       movq	%rdx, %rax
+# CHECK-NEXT:        3     4.7    0.1    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/reg-move-elimination-6.s

@@ -96,3 +96,4 @@ mov %esi, %ecx
 # CHECK-NEXT: 3.     3     3.0    0.0    0.0       addq	%rcx, %rcx
 # CHECK-NEXT: 4.     3     3.3    0.0    0.0       addq	%rcx, %rcx
 # CHECK-NEXT: 5.     3     1.0    1.0    2.3       movl	%esi, %ecx
+# CHECK-NEXT:        3     1.8    0.2    1.1       <total>

+ 6 - 0
test/tools/llvm-mca/X86/Barcelona/store-throughput.s

@@ -138,6 +138,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movb	%bpl, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movb	%sil, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movb	%dil, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [1] Code Region
 
@@ -234,6 +235,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movw	%bp, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movw	%si, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movw	%di, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [2] Code Region
 
@@ -330,6 +332,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movl	%ebp, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movl	%esi, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movl	%edi, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [3] Code Region
 
@@ -426,6 +429,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movq	%rbp, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movq	%rsi, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movq	%rdi, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [4] Code Region
 
@@ -522,6 +526,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movd	%mm1, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movd	%mm2, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movd	%mm3, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [5] Code Region
 
@@ -618,3 +623,4 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movaps	%xmm1, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movaps	%xmm2, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movaps	%xmm3, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Barcelona/zero-idioms.s

@@ -240,3 +240,4 @@ pxor   %xmm2, %xmm2
 # CHECK-NEXT: 32.    1     3.0    0.0    25.0      xorpd	%xmm1, %xmm1
 # CHECK-NEXT: 33.    1     28.0   0.0    0.0       pxor	%mm2, %mm2
 # CHECK-NEXT: 34.    1     3.0    0.0    26.0      pxor	%xmm2, %xmm2
+# CHECK-NEXT:        1     6.7    0.2    10.3      <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/add-sequence.s

@@ -108,3 +108,4 @@ add %eax, %edx
 # CHECK-NEXT: 0.     10    12.0   2.0    0.0       addl	%eax, %ecx
 # CHECK-NEXT: 1.     10    10.7   1.8    1.0       addl	%esi, %eax
 # CHECK-NEXT: 2.     10    12.5   1.0    0.0       addl	%eax, %edx
+# CHECK-NEXT:        10    11.7   1.6    0.3       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s

@@ -61,3 +61,4 @@ bsf   %rax, %rcx
 # CHECK-NEXT: 1.     2     4.0    2.0    2.5       lzcntl	%ecx, %eax
 # CHECK-NEXT: 2.     2     6.0    0.0    1.5       andq	%rcx, %rax
 # CHECK-NEXT: 3.     2     6.0    0.0    0.0       bsfq	%rax, %rcx
+# CHECK-NEXT:        2     4.8    0.6    1.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s

@@ -135,3 +135,4 @@ vandps %xmm4, %xmm1, %xmm0
 # CHECK-NEXT: 15.    2     29.5   18.5   0.0       vaddps	%ymm3, %ymm1, %ymm4
 # CHECK-NEXT: 16.    2     29.5   19.0   0.0       vaddps	%ymm3, %ymm1, %ymm4
 # CHECK-NEXT: 17.    2     34.5   0.0    0.0       vandps	%xmm4, %xmm1, %xmm0
+# CHECK-NEXT:        2     24.0   9.6    0.2       <total>

+ 2 - 0
test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s

@@ -63,6 +63,7 @@ addps  %xmm0, %xmm0
 # CHECK-NEXT: 0.     2     7.0    1.0    0.0       sqrtss	%xmm0, %xmm0
 # CHECK-NEXT: 1.     2     2.0    2.0    8.5       movss	(%eax), %xmm0
 # CHECK-NEXT: 2.     2     8.5    1.5    2.5       addps	%xmm0, %xmm0
+# CHECK-NEXT:        2     5.8    1.5    3.7       <total>
 
 # CHECK:      [1] Code Region
 
@@ -110,3 +111,4 @@ addps  %xmm0, %xmm0
 # CHECK-NEXT: 0.     2     7.0    1.0    0.0       sqrtsd	%xmm0, %xmm0
 # CHECK-NEXT: 1.     2     2.0    2.0    8.5       movsd	(%eax), %xmm0
 # CHECK-NEXT: 2.     2     8.5    1.5    2.5       addps	%xmm0, %xmm0
+# CHECK-NEXT:        2     5.8    1.5    3.7       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s

@@ -84,3 +84,4 @@ cmovae %ebx, %eax
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     1.3    1.3    1.0       cmpl	%eax, %eax
 # CHECK-NEXT: 1.     3     3.7    0.3    0.0       cmovael	%ebx, %eax
+# CHECK-NEXT:        3     2.5    0.8    0.5       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s

@@ -100,3 +100,4 @@ vpcmpeqq %xmm3, %xmm3, %xmm0
 # CHECK-NEXT: 1.     3     6.0    6.0    0.0       vpcmpeqw	%xmm1, %xmm1, %xmm2
 # CHECK-NEXT: 2.     3     4.0    4.0    2.0       vpcmpeqd	%xmm2, %xmm2, %xmm3
 # CHECK-NEXT: 3.     3     6.0    0.0    0.0       vpcmpeqq	%xmm3, %xmm3, %xmm0
+# CHECK-NEXT:        3     5.0    3.5    0.5       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s

@@ -100,3 +100,4 @@ vpcmpgtq %xmm3, %xmm3, %xmm0
 # CHECK-NEXT: 1.     3     0.0    0.0    1.3       vpcmpgtw	%xmm1, %xmm1, %xmm2
 # CHECK-NEXT: 2.     3     0.0    0.0    1.3       vpcmpgtd	%xmm2, %xmm2, %xmm3
 # CHECK-NEXT: 3.     3     1.0    1.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm0
+# CHECK-NEXT:        3     0.3    0.3    1.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s

@@ -85,3 +85,4 @@ sbb %eax, %eax
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     2.7    0.3    0.0       sbbl	%edx, %edx
 # CHECK-NEXT: 1.     3     3.7    0.0    0.0       sbbl	%eax, %eax
+# CHECK-NEXT:        3     3.2    0.2    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s

@@ -93,3 +93,4 @@ sbb %eax, %eax
 # CHECK-NEXT: 0.     3     5.7    2.0    0.0       imull	%edx, %eax
 # CHECK-NEXT: 1.     3     1.7    0.7    6.7       addl	%edx, %edx
 # CHECK-NEXT: 2.     3     5.0    2.7    3.0       sbbl	%eax, %eax
+# CHECK-NEXT:        3     4.1    1.8    3.2       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s

@@ -108,3 +108,4 @@ vpaddd %xmm0, %xmm0, %xmm3
 # CHECK-NEXT: 0.     10    25.0   0.1    0.0       vpmuldq	%xmm0, %xmm0, %xmm1
 # CHECK-NEXT: 1.     10    28.7   0.0    0.0       vpaddd	%xmm1, %xmm1, %xmm0
 # CHECK-NEXT: 2.     10    30.5   0.0    0.0       vpaddd	%xmm0, %xmm0, %xmm3
+# CHECK-NEXT:        10    28.1   0.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/dot-product.s

@@ -87,3 +87,4 @@ vhaddps  %xmm3, %xmm3, %xmm4
 # CHECK-NEXT: 0.     3     1.0    1.0    13.7      vmulps	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 1.     3     6.0    0.7    5.7       vhaddps	%xmm2, %xmm2, %xmm3
 # CHECK-NEXT: 2.     3     16.0   0.0    0.0       vhaddps	%xmm3, %xmm3, %xmm4
+# CHECK-NEXT:        3     7.7    0.6    6.4       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s

@@ -42,3 +42,4 @@ vhaddps (%rdi), %xmm1, %xmm2
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       vshufps	$0, %xmm0, %xmm1, %xmm1
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       vhaddps	(%rdi), %xmm1, %xmm2
+# CHECK-NEXT:        1     1.0    1.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s

@@ -42,3 +42,4 @@ vhaddps (%rdi), %ymm1, %ymm2
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       vshufps	$0, %xmm0, %xmm1, %xmm1
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       vhaddps	(%rdi), %ymm1, %ymm2
+# CHECK-NEXT:        1     1.0    1.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s

@@ -87,3 +87,4 @@ vpinsrb $1, %eax, %xmm0, %xmm0
 # CHECK-NEXT: 0.     3     1.0    0.7    9.3       addl	%eax, %eax
 # CHECK-NEXT: 1.     3     14.3   0.0    0.0       vpinsrb	$0, %eax, %xmm0, %xmm0
 # CHECK-NEXT: 2.     3     15.7   0.0    0.0       vpinsrb	$1, %eax, %xmm0, %xmm0
+# CHECK-NEXT:        3     10.3   0.2    3.1       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/load-store-alias.s

@@ -106,3 +106,4 @@ vmovaps %xmm0, 48(%rdi)
 # CHECK-NEXT: 5.     1     17.0   0.0    0.0       vmovaps	%xmm0, 32(%rdi)
 # CHECK-NEXT: 6.     1     18.0   0.0    0.0       vmovaps	48(%rsi), %xmm0
 # CHECK-NEXT: 7.     1     23.0   0.0    0.0       vmovaps	%xmm0, 48(%rdi)
+# CHECK-NEXT:        1     12.0   0.1    0.0       <total>

+ 6 - 0
test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s

@@ -157,6 +157,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movb	(%rcx), %bpl
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movb	(%rdx), %sil
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movb	%dil, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [1] Code Region
 
@@ -272,6 +273,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movw	(%rcx), %bp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movw	(%rdx), %si
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movw	%di, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [2] Code Region
 
@@ -387,6 +389,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movl	(%rcx), %ebp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movl	(%rdx), %esi
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movl	%edi, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [3] Code Region
 
@@ -502,6 +505,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movq	(%rcx), %rbp
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movq	(%rdx), %rsi
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movq	%rdi, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [4] Code Region
 
@@ -619,6 +623,7 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movd	(%rcx), %mm1
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movd	(%rdx), %mm2
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movd	%mm3, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>
 
 # CHECK:      [5] Code Region
 
@@ -734,3 +739,4 @@ movaps %xmm3, (%rbx)
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movaps	(%rcx), %xmm1
 # CHECK-NEXT: 2.     1     2.0    2.0    0.0       movaps	(%rdx), %xmm2
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movaps	%xmm3, (%rbx)
+# CHECK-NEXT:        1     2.8    1.0    0.0       <total>

+ 7 - 0
test/tools/llvm-mca/X86/BdVer2/load-throughput.s

@@ -162,6 +162,7 @@ vmovaps (%rbx), %ymm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movb	(%rcx), %bpl
 # CHECK-NEXT: 2.     1     3.0    3.0    0.0       movb	(%rdx), %sil
 # CHECK-NEXT: 3.     1     3.0    3.0    0.0       movb	(%rbx), %dil
+# CHECK-NEXT:        1     2.0    2.0    0.0       <total>
 
 # CHECK:      [1] Code Region
 
@@ -275,6 +276,7 @@ vmovaps (%rbx), %ymm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movw	(%rcx), %bp
 # CHECK-NEXT: 2.     1     3.0    3.0    0.0       movw	(%rdx), %si
 # CHECK-NEXT: 3.     1     3.0    3.0    0.0       movw	(%rbx), %di
+# CHECK-NEXT:        1     2.0    2.0    0.0       <total>
 
 # CHECK:      [2] Code Region
 
@@ -388,6 +390,7 @@ vmovaps (%rbx), %ymm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movl	(%rcx), %ebp
 # CHECK-NEXT: 2.     1     3.0    3.0    0.0       movl	(%rdx), %esi
 # CHECK-NEXT: 3.     1     3.0    3.0    0.0       movl	(%rbx), %edi
+# CHECK-NEXT:        1     2.0    2.0    0.0       <total>
 
 # CHECK:      [3] Code Region
 
@@ -501,6 +504,7 @@ vmovaps (%rbx), %ymm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movq	(%rcx), %rbp
 # CHECK-NEXT: 2.     1     3.0    3.0    0.0       movq	(%rdx), %rsi
 # CHECK-NEXT: 3.     1     3.0    3.0    0.0       movq	(%rbx), %rdi
+# CHECK-NEXT:        1     2.0    2.0    0.0       <total>
 
 # CHECK:      [4] Code Region
 
@@ -615,6 +619,7 @@ vmovaps (%rbx), %ymm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movd	(%rcx), %mm1
 # CHECK-NEXT: 2.     1     4.0    4.0    0.0       movd	(%rdx), %mm2
 # CHECK-NEXT: 3.     1     4.0    4.0    0.0       movd	(%rbx), %mm3
+# CHECK-NEXT:        1     2.5    2.5    0.0       <total>
 
 # CHECK:      [5] Code Region
 
@@ -729,6 +734,7 @@ vmovaps (%rbx), %ymm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       movaps	(%rcx), %xmm1
 # CHECK-NEXT: 2.     1     4.0    4.0    0.0       movaps	(%rdx), %xmm2
 # CHECK-NEXT: 3.     1     4.0    4.0    0.0       movaps	(%rbx), %xmm3
+# CHECK-NEXT:        1     2.5    2.5    0.0       <total>
 
 # CHECK:      [6] Code Region
 
@@ -842,3 +848,4 @@ vmovaps (%rbx), %ymm3
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       vmovaps	(%rcx), %ymm1
 # CHECK-NEXT: 2.     1     3.0    3.0    0.0       vmovaps	(%rdx), %ymm2
 # CHECK-NEXT: 3.     1     3.0    3.0    0.0       vmovaps	(%rbx), %ymm3
+# CHECK-NEXT:        1     2.0    2.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s

@@ -106,3 +106,4 @@ vmovaps %xmm0, 48(%rdi)
 # CHECK-NEXT: 5.     1     9.0    1.0    0.0       vmovaps	%xmm0, 32(%rdi)
 # CHECK-NEXT: 6.     1     3.0    3.0    2.0       vmovaps	48(%rsi), %xmm0
 # CHECK-NEXT: 7.     1     10.0   0.0    0.0       vmovaps	%xmm0, 48(%rdi)
+# CHECK-NEXT:        1     5.3    1.3    0.5       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/one-idioms.s

@@ -165,3 +165,4 @@ vpcmpeqw  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 12.    1     8.0    8.0    0.0       vpcmpeqd	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 13.    1     9.0    2.0    0.0       vpcmpeqq	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 14.    1     10.0   10.0   0.0       vpcmpeqw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:        1     4.9    3.8    0.2       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s

@@ -46,3 +46,4 @@ add    %ecx, %ebx
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq	%rax, %rbx
 # CHECK-NEXT: 1.     1     6.0    0.0    0.0       lzcntw	%ax, %bx
 # CHECK-NEXT: 2.     1     8.0    0.0    0.0       addl	%ecx, %ebx
+# CHECK-NEXT:        1     5.0    0.3    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s

@@ -92,3 +92,4 @@ xor %bx, %dx
 # CHECK-NEXT: 0.     3     3.7    0.3    0.0       addw	%cx, %dx
 # CHECK-NEXT: 1.     3     4.3    0.0    0.0       movw	%ax, %dx
 # CHECK-NEXT: 2.     3     5.0    0.0    0.0       xorw	%bx, %dx
+# CHECK-NEXT:        3     4.3    0.1    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s

@@ -92,3 +92,4 @@ add %cx, %bx
 # CHECK-NEXT: 0.     3     6.7    0.7    0.0       imulw	%ax, %bx
 # CHECK-NEXT: 1.     3     9.7    0.0    0.0       lzcntw	%ax, %bx
 # CHECK-NEXT: 2.     3     11.7   0.0    0.0       addw	%cx, %bx
+# CHECK-NEXT:        3     9.3    0.2    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s

@@ -93,3 +93,4 @@ lzcnt 2(%rsp), %cx
 # CHECK-NEXT: 0.     3     7.7    0.3    0.0       imull	%edx, %ecx
 # CHECK-NEXT: 1.     3     7.3    0.0    0.0       lzcntw	(%rsp), %cx
 # CHECK-NEXT: 2.     3     8.7    1.0    0.0       lzcntw	2(%rsp), %cx
+# CHECK-NEXT:        3     7.9    0.4    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s

@@ -45,3 +45,4 @@ add  %ecx, %ebx
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulw	%ax, %cx
 # CHECK-NEXT: 1.     1     5.0    0.0    0.0       addb	%al, %cl
 # CHECK-NEXT: 2.     1     6.0    0.0    0.0       addl	%ecx, %ebx
+# CHECK-NEXT:        1     4.0    0.3    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s

@@ -121,3 +121,4 @@ vsqrtps     %ymm0, %ymm2
 # CHECK-NEXT: 5.     2     3.5    3.5    12.0      vsqrtps	%xmm0, %xmm2
 # CHECK-NEXT: 6.     2     19.5   19.5   0.0       vaddps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 7.     2     7.5    7.5    8.0       vsqrtps	%ymm0, %ymm2
+# CHECK-NEXT:        2     7.9    7.9    6.1       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/pr37790.s

@@ -42,3 +42,4 @@ stmxcsr (%rsp)
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     2     51.5   0.5    0.0       int3
 # CHECK-NEXT: 1.     2     151.0  0.0    0.0       stmxcsr	(%rsp)
+# CHECK-NEXT:        2     101.3  0.3    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/rank.s

@@ -122,3 +122,4 @@ add %ebx, %eax
 # CHECK-NEXT: 5.     3     10.7   1.0    0.0       addl	%edx, %esi
 # CHECK-NEXT: 6.     3     12.0   1.0    0.0       addl	%ebx, %eax
 # CHECK-NEXT: 7.     3     13.0   0.0    0.0       addl	%ebx, %eax
+# CHECK-NEXT:        3     9.9    1.1    0.3       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/read-advance-1.s

@@ -46,3 +46,4 @@ vmulps  (%rdi), %xmm1, %xmm2
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       vaddps	%xmm0, %xmm0, %xmm1
 # CHECK-NEXT: 1.     1     1.0    0.0    0.0       vmulps	(%rdi), %xmm1, %xmm2
+# CHECK-NEXT:        1     1.0    0.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/read-advance-2.s

@@ -45,3 +45,4 @@
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imull	%esi
 # CHECK-NEXT: 1.     1     5.0    4.0    0.0       imull	(%rdi)
+# CHECK-NEXT:        1     3.0    2.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/read-advance-3.s

@@ -45,3 +45,4 @@
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       addq	%rdi, %rsi
 # CHECK-NEXT: 1.     1     1.0    0.0    0.0       addq	(%rsp), %rsi
 # CHECK-NEXT: 2.     1     3.0    3.0    2.0       addq	%rdx, %r8
+# CHECK-NEXT:        1     1.7    1.3    0.7       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s

@@ -104,3 +104,4 @@ vaddps %xmm1, %xmm1, %xmm2
 # CHECK-NEXT: 0.     3     0.0    0.0    5.3       vxorps	%xmm0, %xmm0, %xmm0
 # CHECK-NEXT: 1.     3     1.7    1.7    3.0       vmovaps	%xmm0, %xmm1
 # CHECK-NEXT: 2.     3     3.3    1.0    0.0       vaddps	%xmm1, %xmm1, %xmm2
+# CHECK-NEXT:        3     1.7    0.9    2.8       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s

@@ -144,3 +144,4 @@ movdqu %xmm5, %xmm0
 # CHECK-NEXT: 6.     3     4.7    0.0    0.0       movupd	%xmm3, %xmm4
 # CHECK-NEXT: 7.     3     5.3    0.0    0.0       movdqa	%xmm4, %xmm5
 # CHECK-NEXT: 8.     3     6.0    0.0    0.0       movdqu	%xmm5, %xmm0
+# CHECK-NEXT:        3     3.0    0.4    1.4       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s

@@ -129,3 +129,4 @@ vmovdqu %xmm5, %xmm0
 # CHECK-NEXT: 4.     3     5.7    0.0    0.0       vmovupd	%xmm3, %xmm4
 # CHECK-NEXT: 5.     3     6.3    0.0    0.0       vmovdqa	%xmm4, %xmm5
 # CHECK-NEXT: 6.     3     7.0    0.0    0.0       vmovdqu	%xmm5, %xmm0
+# CHECK-NEXT:        3     4.5    0.5    0.8       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s

@@ -115,3 +115,4 @@ mov %edx, %eax
 # CHECK-NEXT: 2.     3     4.7    0.0    0.0       movl	%ebx, %ecx
 # CHECK-NEXT: 3.     3     5.3    0.0    0.0       movl	%ecx, %edx
 # CHECK-NEXT: 4.     3     6.0    0.0    0.0       movl	%edx, %eax
+# CHECK-NEXT:        3     4.0    0.8    0.7       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s

@@ -115,3 +115,4 @@ mov %rdx, %rax
 # CHECK-NEXT: 2.     3     4.7    0.0    0.0       movq	%rbx, %rcx
 # CHECK-NEXT: 3.     3     5.3    0.0    0.0       movq	%rcx, %rdx
 # CHECK-NEXT: 4.     3     6.0    0.0    0.0       movq	%rdx, %rax
+# CHECK-NEXT:        3     4.0    0.8    0.7       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/register-files-1.s

@@ -100,3 +100,4 @@ vmulps %xmm0, %xmm0, %xmm0
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     5     20.2   0.2    0.0       vaddps	%xmm0, %xmm0, %xmm0
 # CHECK-NEXT: 1.     5     25.2   0.0    0.0       vmulps	%xmm0, %xmm0, %xmm0
+# CHECK-NEXT:        5     22.7   0.1    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/register-files-2.s

@@ -100,3 +100,4 @@ vmulps %xmm0, %xmm0, %xmm0
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     5     14.0   0.2    0.0       vaddps	%xmm0, %xmm0, %xmm0
 # CHECK-NEXT: 1.     5     15.8   0.0    0.0       vmulps	%xmm0, %xmm0, %xmm0
+# CHECK-NEXT:        5     14.9   0.1    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/register-files-5.s

@@ -151,3 +151,4 @@
 # CHECK-NEXT: 30.    1     24.0   11.0   0.0       vaddps	%ymm3, %ymm0, %ymm4
 # CHECK-NEXT: 31.    1     25.0   12.0   0.0       vaddps	%ymm3, %ymm0, %ymm5
 # CHECK-NEXT: 32.    1     25.0   13.0   0.0       vaddps	%ymm3, %ymm0, %ymm6
+# CHECK-NEXT:        1     15.6   11.2   0.6       <total>

+ 7 - 0
test/tools/llvm-mca/X86/BdVer2/store-throughput.s

@@ -162,6 +162,7 @@ vmovaps %ymm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movb	%bpl, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movb	%sil, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movb	%dil, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [1] Code Region
 
@@ -275,6 +276,7 @@ vmovaps %ymm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movw	%bp, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movw	%si, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movw	%di, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [2] Code Region
 
@@ -388,6 +390,7 @@ vmovaps %ymm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movl	%ebp, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movl	%esi, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movl	%edi, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [3] Code Region
 
@@ -501,6 +504,7 @@ vmovaps %ymm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movq	%rbp, (%rcx)
 # CHECK-NEXT: 2.     1     3.0    0.0    0.0       movq	%rsi, (%rdx)
 # CHECK-NEXT: 3.     1     4.0    0.0    0.0       movq	%rdi, (%rbx)
+# CHECK-NEXT:        1     2.5    0.3    0.0       <total>
 
 # CHECK:      [4] Code Region
 
@@ -616,6 +620,7 @@ vmovaps %ymm3, (%rbx)
 # CHECK-NEXT: 1.     1     3.0    0.0    0.0       movd	%mm1, (%rcx)
 # CHECK-NEXT: 2.     1     5.0    0.0    0.0       movd	%mm2, (%rdx)
 # CHECK-NEXT: 3.     1     7.0    0.0    0.0       movd	%mm3, (%rbx)
+# CHECK-NEXT:        1     4.0    0.3    0.0       <total>
 
 # CHECK:      [5] Code Region
 
@@ -730,6 +735,7 @@ vmovaps %ymm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       movaps	%xmm1, (%rcx)
 # CHECK-NEXT: 2.     1     4.0    1.0    0.0       movaps	%xmm2, (%rdx)
 # CHECK-NEXT: 3.     1     5.0    0.0    0.0       movaps	%xmm3, (%rbx)
+# CHECK-NEXT:        1     3.0    0.5    0.0       <total>
 
 # CHECK:      [6] Code Region
 
@@ -843,3 +849,4 @@ vmovaps %ymm3, (%rbx)
 # CHECK-NEXT: 1.     1     2.0    1.0    0.0       vmovaps	%ymm1, (%rcx)
 # CHECK-NEXT: 2.     1     35.0   33.0   0.0       vmovaps	%ymm2, (%rdx)
 # CHECK-NEXT: 3.     1     36.0   1.0    0.0       vmovaps	%ymm3, (%rbx)
+# CHECK-NEXT:        1     18.5   9.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s

@@ -80,3 +80,4 @@ vbroadcastss (%rax), %ymm0
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     1.0    1.0    2.7       leaq	8(%rsp,%rdi,2), %rax
 # CHECK-NEXT: 1.     3     1.7    0.7    0.0       vbroadcastss	(%rax), %ymm0
+# CHECK-NEXT:        3     1.3    0.8    1.3       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s

@@ -41,3 +41,4 @@ vandps (%rdi), %xmm1, %xmm2
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       vaddps	%xmm0, %xmm0, %xmm1
 # CHECK-NEXT: 1.     1     1.0    0.0    0.0       vandps	(%rdi), %xmm1, %xmm2
+# CHECK-NEXT:        1     1.0    0.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s

@@ -41,3 +41,4 @@ vandps (%rdi), %ymm1, %ymm2
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       vaddps	%ymm0, %ymm0, %ymm1
 # CHECK-NEXT: 1.     1     1.0    0.0    0.0       vandps	(%rdi), %ymm1, %ymm2
+# CHECK-NEXT:        1     1.0    0.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s

@@ -102,3 +102,4 @@
 # CHECK-NEXT: 3.     2     16.0   0.0    6.0       vaddps	%ymm4, %ymm5, %ymm6
 # CHECK-NEXT: 4.     2     20.0   0.0    4.0       vmulps	%ymm6, %ymm3, %ymm4
 # CHECK-NEXT: 5.     2     25.0   0.0    1.5       vaddps	%ymm4, %ymm5, %ymm0
+# CHECK-NEXT:        2     14.8   0.4    5.3       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s

@@ -102,3 +102,4 @@
 # CHECK-NEXT: 3.     2     8.0    0.0    6.0       vaddps	%ymm4, %ymm5, %ymm6
 # CHECK-NEXT: 4.     2     12.0   0.0    4.0       vmulps	%ymm6, %ymm3, %ymm4
 # CHECK-NEXT: 5.     2     17.0   0.0    1.5       vaddps	%ymm4, %ymm5, %ymm0
+# CHECK-NEXT:        2     8.7    0.3    5.3       <total>

+ 5 - 0
test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s

@@ -120,6 +120,7 @@ vaddps  %ymm1, %ymm1, %ymm0
 # CHECK-NEXT: 0.     3     2.0    2.0    0.0       vaddps	%ymm0, %ymm0, %ymm1
 # CHECK-NEXT: 1.     3     3.0    3.0    1.7       vxorps	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT: 2.     3     4.3    0.0    0.3       vblendps	$2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT:        3     3.1    1.7    0.7       <total>
 
 # CHECK:      [1] Code Region - ZERO-IDIOM-2
 
@@ -205,6 +206,7 @@ vaddps  %ymm1, %ymm1, %ymm0
 # CHECK-NEXT: 0.     3     2.0    2.0    0.0       vaddpd	%ymm0, %ymm0, %ymm1
 # CHECK-NEXT: 1.     3     3.0    3.0    1.7       vxorpd	%ymm1, %ymm1, %ymm1
 # CHECK-NEXT: 2.     3     4.3    0.0    0.3       vblendpd	$2, %ymm1, %ymm2, %ymm3
+# CHECK-NEXT:        3     3.1    1.7    0.7       <total>
 
 # CHECK:      [2] Code Region - ZERO-IDIOM-3
 
@@ -284,6 +286,7 @@ vaddps  %ymm1, %ymm1, %ymm0
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     2.0    2.0    0.0       vaddps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 1.     3     2.0    2.0    3.0       vandnps	%ymm2, %ymm2, %ymm3
+# CHECK-NEXT:        3     2.0    2.0    1.5       <total>
 
 # CHECK:      [3] Code Region - ZERO-IDIOM-4
 
@@ -363,6 +366,7 @@ vaddps  %ymm1, %ymm1, %ymm0
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     2.0    2.0    0.0       vaddps	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT: 1.     3     2.0    2.0    3.0       vandnps	%ymm2, %ymm2, %ymm3
+# CHECK-NEXT:        3     2.0    2.0    1.5       <total>
 
 # CHECK:      [4] Code Region - ZERO-IDIOM-5
 
@@ -442,3 +446,4 @@ vaddps  %ymm1, %ymm1, %ymm0
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     7.0    0.3    0.0       vperm2f128	$136, %ymm0, %ymm0, %ymm1
 # CHECK-NEXT: 1.     3     9.0    0.0    0.0       vaddps	%ymm1, %ymm1, %ymm0
+# CHECK-NEXT:        3     8.0    0.2    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BdVer2/zero-idioms.s

@@ -450,3 +450,4 @@ vpxor  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 68.    1     0.0    0.0    11.0      vxorps	%xmm4, %xmm4, %xmm5
 # CHECK-NEXT: 69.    1     0.0    0.0    11.0      vxorpd	%xmm1, %xmm1, %xmm3
 # CHECK-NEXT: 70.    1     0.0    0.0    12.0      vpxor	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:        1     1.2    1.2    4.1       <total>

+ 1 - 0
test/tools/llvm-mca/X86/Broadwell/zero-idioms.s

@@ -448,3 +448,4 @@ vpxor  %ymm3, %ymm3, %ymm5
 # CHECK-NEXT: 72.    1     0.0    0.0    3.0       vxorpd	%ymm1, %ymm1, %ymm3
 # CHECK-NEXT: 73.    1     0.0    0.0    3.0       vpxor	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 74.    1     0.0    0.0    3.0       vpxor	%ymm3, %ymm3, %ymm5
+# CHECK-NEXT:        1     0.9    0.2    1.8       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/add-sequence.s

@@ -99,3 +99,4 @@ add %eax, %edx
 # CHECK-NEXT: 0.     10    2.5    0.4    0.0       addl	%eax, %ecx
 # CHECK-NEXT: 1.     10    2.1    0.7    0.5       addl	%esi, %eax
 # CHECK-NEXT: 2.     10    2.6    0.0    0.3       addl	%eax, %edx
+# CHECK-NEXT:        10    2.4    0.4    0.3       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s

@@ -99,3 +99,4 @@ add %edx, %eax
 # CHECK-NEXT: 1.     1     2.0    0.0    0.0       addl	%ebx, %ecx
 # CHECK-NEXT: 2.     1     2.0    0.0    0.0       addl	%ecx, %edx
 # CHECK-NEXT: 3.     1     3.0    0.0    0.0       addl	%edx, %eax
+# CHECK-NEXT:        1     2.0    0.3    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s

@@ -124,3 +124,4 @@ vmovaps %xmm0, 48(%rdi)
 # CHECK-NEXT: 5.     1     16.0   0.0    0.0       vmovaps	%xmm0, 32(%rdi)
 # CHECK-NEXT: 6.     1     16.0   0.0    0.0       vmovaps	48(%rsi), %xmm0
 # CHECK-NEXT: 7.     1     21.0   0.0    0.0       vmovaps	%xmm0, 48(%rdi)
+# CHECK-NEXT:        1     11.0   0.1    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/clear-super-register-1.s

@@ -61,3 +61,4 @@ bsf   %rax, %rcx
 # CHECK-NEXT: 1.     2     1.5    1.0    4.5       lzcntl	%ecx, %eax
 # CHECK-NEXT: 2.     2     2.0    0.0    4.5       andq	%rcx, %rax
 # CHECK-NEXT: 3.     2     2.0    0.0    0.5       bsfq	%rax, %rcx
+# CHECK-NEXT:        2     1.8    0.4    2.4       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s

@@ -121,3 +121,4 @@ vandps %xmm4, %xmm1, %xmm0
 # CHECK-NEXT: 15.    2     21.0   21.0   13.5      vaddps	%ymm3, %ymm1, %ymm4
 # CHECK-NEXT: 16.    2     22.0   22.0   12.5      vaddps	%ymm3, %ymm1, %ymm4
 # CHECK-NEXT: 17.    2     24.0   0.0    11.5      vandps	%xmm4, %xmm1, %xmm0
+# CHECK-NEXT:        2     17.5   9.9    21.6      <total>

+ 8 - 0
test/tools/llvm-mca/X86/BtVer2/cmpxchg-read-advance.s

@@ -110,6 +110,7 @@ lock cmpxchg16b (%rsp)
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq	%rax, %rax
 # CHECK-NEXT: 1.     1     3.0    0.0    0.0       cmpxchgq	%rcx, (%rdx)
+# CHECK-NEXT:        1     2.0    0.5    0.0       <total>
 
 # CHECK:      [1] Code Region
 
@@ -176,6 +177,7 @@ lock cmpxchg16b (%rsp)
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq	%rcx, %rcx
 # CHECK-NEXT: 1.     1     3.0    0.0    0.0       cmpxchgq	%rcx, (%rdx)
+# CHECK-NEXT:        1     2.0    0.5    0.0       <total>
 
 # CHECK:      [2] Code Region
 
@@ -242,6 +244,7 @@ lock cmpxchg16b (%rsp)
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq	%rax, %rax
 # CHECK-NEXT: 1.     1     3.0    0.0    0.0       lock		cmpxchgq	%rcx, (%rdx)
+# CHECK-NEXT:        1     2.0    0.5    0.0       <total>
 
 # CHECK:      [3] Code Region
 
@@ -308,6 +311,7 @@ lock cmpxchg16b (%rsp)
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq	%rcx, %rcx
 # CHECK-NEXT: 1.     1     3.0    0.0    0.0       lock		cmpxchgq	%rcx, (%rdx)
+# CHECK-NEXT:        1     2.0    0.5    0.0       <total>
 
 # CHECK:      [4] Code Region
 
@@ -378,6 +382,7 @@ lock cmpxchg16b (%rsp)
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imull	%eax, %eax
 # CHECK-NEXT: 1.     1     2.0    2.0    0.0       imull	%edx, %edx
 # CHECK-NEXT: 2.     1     1.0    0.0    0.0       cmpxchg8b	(%rsp)
+# CHECK-NEXT:        1     1.3    1.0    0.0       <total>
 
 # CHECK:      [5] Code Region
 
@@ -448,6 +453,7 @@ lock cmpxchg16b (%rsp)
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imull	%eax, %eax
 # CHECK-NEXT: 1.     1     2.0    2.0    0.0       imull	%edx, %edx
 # CHECK-NEXT: 2.     1     1.0    0.0    0.0       cmpxchg16b	(%rsp)
+# CHECK-NEXT:        1     1.3    1.0    0.0       <total>
 
 # CHECK:      [6] Code Region
 
@@ -518,6 +524,7 @@ lock cmpxchg16b (%rsp)
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imull	%ebx, %ebx
 # CHECK-NEXT: 1.     1     2.0    2.0    0.0       imull	%ecx, %ecx
 # CHECK-NEXT: 2.     1     1.0    0.0    0.0       lock		cmpxchg8b	(%rsp)
+# CHECK-NEXT:        1     1.3    1.0    0.0       <total>
 
 # CHECK:      [7] Code Region
 
@@ -588,3 +595,4 @@ lock cmpxchg16b (%rsp)
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imull	%ebx, %ebx
 # CHECK-NEXT: 1.     1     2.0    2.0    0.0       imull	%ecx, %ecx
 # CHECK-NEXT: 2.     1     1.0    0.0    0.0       lock		cmpxchg16b	(%rsp)
+# CHECK-NEXT:        1     1.3    1.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/dependency-breaking-cmp.s

@@ -75,3 +75,4 @@ cmovae %ebx, %eax
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     1.0    1.0    0.0       cmpl	%eax, %eax
 # CHECK-NEXT: 1.     3     2.0    0.0    0.0       cmovael	%ebx, %eax
+# CHECK-NEXT:        3     1.5    0.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpeq.s

@@ -90,3 +90,4 @@ vpcmpeqq %xmm3, %xmm3, %xmm0
 # CHECK-NEXT: 1.     3     1.0    1.0    0.0       vpcmpeqw	%xmm1, %xmm1, %xmm2
 # CHECK-NEXT: 2.     3     1.0    1.0    0.0       vpcmpeqd	%xmm2, %xmm2, %xmm3
 # CHECK-NEXT: 3.     3     1.0    1.0    0.0       vpcmpeqq	%xmm3, %xmm3, %xmm0
+# CHECK-NEXT:        3     1.0    1.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/dependency-breaking-pcmpgt.s

@@ -91,3 +91,4 @@ vpcmpgtq %xmm3, %xmm3, %xmm0
 # CHECK-NEXT: 1.     3     0.0    0.0    0.0       vpcmpgtw	%xmm1, %xmm1, %xmm2
 # CHECK-NEXT: 2.     3     0.0    0.0    0.0       vpcmpgtd	%xmm2, %xmm2, %xmm3
 # CHECK-NEXT: 3.     3     0.0    0.0    0.0       vpcmpgtq	%xmm3, %xmm3, %xmm0
+# CHECK-NEXT:        3     0.0    0.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-1.s

@@ -76,3 +76,4 @@ sbb %eax, %eax
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     3     2.0    0.3    0.0       sbbl	%edx, %edx
 # CHECK-NEXT: 1.     3     3.0    0.0    0.0       sbbl	%eax, %eax
+# CHECK-NEXT:        3     2.5    0.2    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/dependency-breaking-sbb-2.s

@@ -84,3 +84,4 @@ sbb %eax, %eax
 # CHECK-NEXT: 0.     3     2.3    1.0    0.0       imull	%edx, %eax
 # CHECK-NEXT: 1.     3     1.3    1.0    2.7       addl	%edx, %edx
 # CHECK-NEXT: 2.     3     1.7    0.0    2.7       sbbl	%eax, %eax
+# CHECK-NEXT:        3     1.8    0.7    1.8       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/dependent-pmuld-paddd.s

@@ -99,3 +99,4 @@ vpaddd %xmm0, %xmm0, %xmm3
 # CHECK-NEXT: 0.     10    8.0    0.1    0.0       vpmuldq	%xmm0, %xmm0, %xmm1
 # CHECK-NEXT: 1.     10    9.5    0.0    0.0       vpaddd	%xmm1, %xmm1, %xmm0
 # CHECK-NEXT: 2.     10    10.0   0.0    0.0       vpaddd	%xmm0, %xmm0, %xmm3
+# CHECK-NEXT:        10    9.2    0.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/dot-product.s

@@ -78,3 +78,4 @@ vhaddps  %xmm3, %xmm3, %xmm4
 # CHECK-NEXT: 0.     3     1.0    1.0    4.7       vmulps	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 1.     3     2.7    0.0    2.3       vhaddps	%xmm2, %xmm2, %xmm3
 # CHECK-NEXT: 2.     3     6.0    0.0    0.0       vhaddps	%xmm3, %xmm3, %xmm4
+# CHECK-NEXT:        3     3.2    0.3    2.3       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-1.s

@@ -42,3 +42,4 @@ vhaddps (%rdi), %xmm1, %xmm2
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       vshufps	$0, %xmm0, %xmm1, %xmm1
 # CHECK-NEXT: 1.     1     1.0    0.0    0.0       vhaddps	(%rdi), %xmm1, %xmm2
+# CHECK-NEXT:        1     1.0    0.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/hadd-read-after-ld-2.s

@@ -42,3 +42,4 @@ vhaddps (%rdi), %ymm1, %ymm2
 # CHECK:            [0]    [1]    [2]    [3]
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       vshufps	$0, %xmm0, %xmm1, %xmm1
 # CHECK-NEXT: 1.     1     1.0    1.0    0.0       vhaddps	(%rdi), %ymm1, %ymm2
+# CHECK-NEXT:        1     1.0    1.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/int-to-fpu-forwarding-3.s

@@ -80,3 +80,4 @@ vpinsrb $1, %eax, %xmm0, %xmm0
 # CHECK-NEXT: 0.     3     1.0    1.0    3.3       addl	%eax, %eax
 # CHECK-NEXT: 1.     3     7.0    0.0    0.0       vpinsrb	$0, %eax, %xmm0, %xmm0
 # CHECK-NEXT: 2.     3     7.0    0.0    0.0       vpinsrb	$1, %eax, %xmm0, %xmm0
+# CHECK-NEXT:        3     5.0    0.3    1.1       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/load-store-alias.s

@@ -97,3 +97,4 @@ vmovaps %xmm0, 48(%rdi)
 # CHECK-NEXT: 5.     1     16.0   0.0    0.0       vmovaps	%xmm0, 32(%rdi)
 # CHECK-NEXT: 6.     1     16.0   0.0    0.0       vmovaps	48(%rsi), %xmm0
 # CHECK-NEXT: 7.     1     21.0   0.0    0.0       vmovaps	%xmm0, 48(%rdi)
+# CHECK-NEXT:        1     11.0   0.1    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/memcpy-like-test.s

@@ -97,3 +97,4 @@ vmovaps %xmm0, 48(%rdi)
 # CHECK-NEXT: 5.     1     6.0    0.0    0.0       vmovaps	%xmm0, 32(%rdi)
 # CHECK-NEXT: 6.     1     1.0    1.0    0.0       vmovaps	48(%rsi), %xmm0
 # CHECK-NEXT: 7.     1     6.0    0.0    0.0       vmovaps	%xmm0, 48(%rdi)
+# CHECK-NEXT:        1     3.5    0.5    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/one-idioms.s

@@ -156,3 +156,4 @@ vpcmpeqw  %xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 12.    1     1.0    1.0    0.0       vpcmpeqd	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 13.    1     1.0    1.0    0.0       vpcmpeqq	%xmm3, %xmm3, %xmm5
 # CHECK-NEXT: 14.    1     1.0    1.0    0.0       vpcmpeqw	%xmm3, %xmm3, %xmm5
+# CHECK-NEXT:        1     1.0    1.0    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/partial-reg-update-2.s

@@ -46,3 +46,4 @@ add    %ecx, %ebx
 # CHECK-NEXT: 0.     1     1.0    1.0    0.0       imulq	%rax, %rbx
 # CHECK-NEXT: 1.     1     7.0    0.0    0.0       lzcntw	%ax, %bx
 # CHECK-NEXT: 2.     1     7.0    0.0    0.0       addl	%ecx, %ebx
+# CHECK-NEXT:        1     5.0    0.3    0.0       <total>

+ 1 - 0
test/tools/llvm-mca/X86/BtVer2/partial-reg-update-3.s

@@ -83,3 +83,4 @@ xor %bx, %dx
 # CHECK-NEXT: 0.     3     2.7    0.3    0.0       addw	%cx, %dx
 # CHECK-NEXT: 1.     3     3.3    0.0    0.0       movw	%ax, %dx
 # CHECK-NEXT: 2.     3     3.7    0.0    0.0       xorw	%bx, %dx
+# CHECK-NEXT:        3     3.2    0.1    0.0       <total>

部分文件因文件數量過多而無法顯示