Browse Source

[utils] De-duplicate utils/update_{llc_,}test_checks.py

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D42654

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323718 91177308-0d34-0410-b5e6-96231b3b80d8
Fangrui Song 7 years ago
parent
commit
6ceb7d83e6

+ 0 - 0
utils/UpdateTestChecks/__init__.py


+ 199 - 0
utils/UpdateTestChecks/asm.py

@@ -0,0 +1,199 @@
+import re
+import string
+
+from . import common
+
+# RegEx: this is where the magic happens.
+
+ASM_FUNCTION_X86_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
+    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
+    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section|#+ -- End function)',
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_ARM_RE = re.compile(
+        r'^(?P<func>[0-9a-zA-Z_]+):\n' # f: (name of function)
+        r'\s+\.fnstart\n' # .fnstart
+        r'(?P<body>.*?)\n' # (body of the function)
+        r'.Lfunc_end[0-9]+:', # .Lfunc_end0: or # -- End function
+        flags=(re.M | re.S))
+
+ASM_FUNCTION_AARCH64_RE = re.compile(
+     r'^_?(?P<func>[^:]+):[ \t]*\/\/[ \t]*@(?P=func)\n'
+     r'[ \t]+.cfi_startproc\n'
+     r'(?P<body>.*?)\n'
+     # This list is incomplete
+     r'.Lfunc_end[0-9]+:\n',
+     flags=(re.M | re.S))
+
+ASM_FUNCTION_MIPS_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func)
+    r'(?:^[ \t]+\.(frame|f?mask|set).*?\n)+'  # Mips+LLVM standard asm prologue
+    r'(?P<body>.*?)\n'                        # (body of the function)
+    r'(?:^[ \t]+\.(set|end).*?\n)+'           # Mips+LLVM standard asm epilogue
+    r'(\$|\.L)func_end[0-9]+:\n',             # $func_end0: (mips32 - O32) or
+                                              # .Lfunc_end0: (mips64 - NewABI)
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_PPC_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
+    r'\.Lfunc_begin[0-9]+:\n'
+    r'(?:[ \t]+.cfi_startproc\n)?'
+    r'(?:\.Lfunc_[gl]ep[0-9]+:\n(?:[ \t]+.*?\n)*)*'
+    r'(?P<body>.*?)\n'
+    # This list is incomplete
+    r'(?:^[ \t]*(?:\.long[ \t]+[^\n]+|\.quad[ \t]+[^\n]+)\n)*'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_RISCV_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
+    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
+ASM_FUNCTION_SYSTEMZ_RE = re.compile(
+    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
+    r'[ \t]+.cfi_startproc\n'
+    r'(?P<body>.*?)\n'
+    r'.Lfunc_end[0-9]+:\n',
+    flags=(re.M | re.S))
+
+
+SCRUB_LOOP_COMMENT_RE = re.compile(
+    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
+
+SCRUB_X86_SHUFFLES_RE = (
+    re.compile(
+        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
+        flags=re.M))
+SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
+SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
+SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
+SCRUB_X86_RET_RE = re.compile(r'ret[l|q]')
+
+def scrub_asm_x86(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Detect shuffle asm comments and hide the operands in favor of the comments.
+  asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
+  # Generically match the stack offset of a memory operand.
+  asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
+  # Generically match a RIP-relative memory operand.
+  asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
+  # Generically match a LCP symbol.
+  asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
+  if args.x86_extra_scrub:
+    # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.
+    asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)
+  # Strip kill operands inserted into the asm.
+  asm = common.SCRUB_KILL_COMMENT_RE.sub('', asm)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_arm_eabi(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip kill operands inserted into the asm.
+  asm = common.SCRUB_KILL_COMMENT_RE.sub('', asm)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_powerpc64(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Stripe unimportant comments
+  asm = SCRUB_LOOP_COMMENT_RE.sub(r'', asm)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_mips(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_riscv(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+def scrub_asm_systemz(asm, args):
+  # Scrub runs of whitespace out of the assembly, but leave the leading
+  # whitespace in place.
+  asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm)
+  # Expand the tabs used for indentation.
+  asm = string.expandtabs(asm, 2)
+  # Strip trailing whitespace.
+  asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
+  return asm
+
+
+def build_function_body_dictionary_for_triple(args, raw_tool_output, triple, prefixes, func_dict):
+  target_handlers = {
+      'x86_64': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'i686': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'x86': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'i386': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
+      'aarch64': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE),
+      'arm-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6t2': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6t2-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv6m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv7m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8m.base': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'thumbv8m.main': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armeb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7eb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'armv7eb': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
+      'mips': (scrub_asm_mips, ASM_FUNCTION_MIPS_RE),
+      'powerpc64': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
+      'powerpc64le': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
+      'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
+      'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
+      's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE),
+  }
+  handlers = None
+  for prefix, s in target_handlers.items():
+    if triple.startswith(prefix):
+      handlers = s
+      break
+  else:
+    raise KeyError('Triple %r is not supported' % (triple))
+
+  scrubber, function_re = handlers
+  common.build_function_body_dictionary(
+          function_re, scrubber, [args], raw_tool_output, prefixes,
+          func_dict, args.verbose)

+ 65 - 0
utils/UpdateTestChecks/common.py

@@ -0,0 +1,65 @@
+import re
+import subprocess
+
+RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
+CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
+CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+
+IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
+TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$')
+TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)')
+
+SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
+SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
+SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
+SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
+SCRUB_LOOP_COMMENT_RE = re.compile(
+    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
+
+def should_add_line_to_output(input_line, prefix_set):
+  # Skip any blank comment lines in the IR.
+  if input_line.strip() == ';':
+    return False
+  # Skip any blank lines in the IR.
+  #if input_line.strip() == '':
+  #  return False
+  # And skip any CHECK lines. We're building our own.
+  m = CHECK_RE.match(input_line)
+  if m and m.group(1) in prefix_set:
+    return False
+
+  return True
+
+# Invoke the tool that is being tested.
+def invoke_tool(exe, cmd_args, ir):
+  with open(ir) as ir_file:
+    stdout = subprocess.check_output(exe + ' ' + cmd_args,
+                                     shell=True, stdin=ir_file)
+  # Fix line endings to unix CR style.
+  stdout = stdout.replace('\r\n', '\n')
+  return stdout
+
+# Build up a dictionary of all the function bodies.
+def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose):
+  for m in function_re.finditer(raw_tool_output):
+    if not m:
+      continue
+    func = m.group('func')
+    scrubbed_body = scrubber(m.group('body'), *scrubber_args)
+    if func.startswith('stress'):
+      # We only use the last line of the function body for stress tests.
+      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
+    if verbose:
+      print >>sys.stderr, 'Processing function: ' + func
+      for l in scrubbed_body.splitlines():
+        print >>sys.stderr, '  ' + l
+    for prefix in prefixes:
+      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
+        if prefix == prefixes[-1]:
+          print >>sys.stderr, ('WARNING: Found conflicting asm under the '
+                               'same prefix: %r!' % (prefix,))
+        else:
+          func_dict[prefix][func] = None
+          continue
+
+      func_dict[prefix][func] = scrubbed_body

+ 16 - 263
utils/update_llc_test_checks.py

@@ -2,7 +2,7 @@
 
 
 """A test case update script.
 """A test case update script.
 
 
-This script is a utility to update LLVM X86 'llc' based test cases with new
+This script is a utility to update LLVM 'llc' based test cases with new
 FileCheck patterns. It can either update all of the tests in the file or
 FileCheck patterns. It can either update all of the tests in the file or
 a single test function.
 a single test function.
 """
 """
@@ -14,241 +14,9 @@ import subprocess
 import sys
 import sys
 import re
 import re
 
 
-# Invoke the tool that is being tested.
-def llc(args, cmd_args, ir):
-  with open(ir) as ir_file:
-    stdout = subprocess.check_output(args.llc_binary + ' ' + cmd_args,
-                                     shell=True, stdin=ir_file)
-  # Fix line endings to unix CR style.
-  stdout = stdout.replace('\r\n', '\n')
-  return stdout
-
-
-# RegEx: this is where the magic happens.
-
-ASM_FUNCTION_X86_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
-    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
-    r'^\s*(?:[^:\n]+?:\s*\n\s*\.size|\.cfi_endproc|\.globl|\.comm|\.(?:sub)?section|#+ -- End function)',
-    flags=(re.M | re.S))
-
-ASM_FUNCTION_ARM_RE = re.compile(
-        r'^(?P<func>[0-9a-zA-Z_]+):\n' # f: (name of function)
-        r'\s+\.fnstart\n' # .fnstart
-        r'(?P<body>.*?)\n' # (body of the function)
-        r'.Lfunc_end[0-9]+:', # .Lfunc_end0: or # -- End function
-        flags=(re.M | re.S))
-
-ASM_FUNCTION_AARCH64_RE = re.compile(
-     r'^_?(?P<func>[^:]+):[ \t]*\/\/[ \t]*@(?P=func)\n'
-     r'[ \t]+.cfi_startproc\n'
-     r'(?P<body>.*?)\n'
-     # This list is incomplete
-     r'.Lfunc_end[0-9]+:\n',
-     flags=(re.M | re.S))
-
-ASM_FUNCTION_MIPS_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?' # f: (name of func)
-    r'(?:^[ \t]+\.(frame|f?mask|set).*?\n)+'  # Mips+LLVM standard asm prologue
-    r'(?P<body>.*?)\n'                        # (body of the function)
-    r'(?:^[ \t]+\.(set|end).*?\n)+'           # Mips+LLVM standard asm epilogue
-    r'(\$|\.L)func_end[0-9]+:\n',             # $func_end0: (mips32 - O32) or
-                                              # .Lfunc_end0: (mips64 - NewABI)
-    flags=(re.M | re.S))
-
-ASM_FUNCTION_PPC_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
-    r'\.Lfunc_begin[0-9]+:\n'
-    r'(?:[ \t]+.cfi_startproc\n)?'
-    r'(?:\.Lfunc_[gl]ep[0-9]+:\n(?:[ \t]+.*?\n)*)*'
-    r'(?P<body>.*?)\n'
-    # This list is incomplete
-    r'(?:^[ \t]*(?:\.long[ \t]+[^\n]+|\.quad[ \t]+[^\n]+)\n)*'
-    r'.Lfunc_end[0-9]+:\n',
-    flags=(re.M | re.S))
-
-ASM_FUNCTION_RISCV_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n[^:]*?'
-    r'(?P<body>^##?[ \t]+[^:]+:.*?)\s*'
-    r'.Lfunc_end[0-9]+:\n',
-    flags=(re.M | re.S))
-
-ASM_FUNCTION_SYSTEMZ_RE = re.compile(
-    r'^_?(?P<func>[^:]+):[ \t]*#+[ \t]*@(?P=func)\n'
-    r'[ \t]+.cfi_startproc\n'
-    r'(?P<body>.*?)\n'
-    r'.Lfunc_end[0-9]+:\n',
-    flags=(re.M | re.S))
-
-
-SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
-SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
-SCRUB_LOOP_COMMENT_RE = re.compile(
-    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
-
-SCRUB_X86_SHUFFLES_RE = (
-    re.compile(
-        r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$',
-        flags=re.M))
-SCRUB_X86_SP_RE = re.compile(r'\d+\(%(esp|rsp)\)')
-SCRUB_X86_RIP_RE = re.compile(r'[.\w]+\(%rip\)')
-SCRUB_X86_LCP_RE = re.compile(r'\.LCPI[0-9]+_[0-9]+')
-SCRUB_X86_RET_RE = re.compile(r'ret[l|q]')
-
-RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
-TRIPLE_ARG_RE = re.compile(r'-mtriple=([^ ]+)')
-TRIPLE_IR_RE = re.compile(r'^target\s+triple\s*=\s*"([^"]+)"$')
-IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
-CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
-
-def scrub_asm_x86(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Detect shuffle asm comments and hide the operands in favor of the comments.
-  asm = SCRUB_X86_SHUFFLES_RE.sub(r'\1 {{.*#+}} \2', asm)
-  # Generically match the stack offset of a memory operand.
-  asm = SCRUB_X86_SP_RE.sub(r'{{[0-9]+}}(%\1)', asm)
-  # Generically match a RIP-relative memory operand.
-  asm = SCRUB_X86_RIP_RE.sub(r'{{.*}}(%rip)', asm)
-  # Generically match a LCP symbol.
-  asm = SCRUB_X86_LCP_RE.sub(r'{{\.LCPI.*}}', asm)
-  if args.x86_extra_scrub:
-    # Avoid generating different checks for 32- and 64-bit because of 'retl' vs 'retq'.
-    asm = SCRUB_X86_RET_RE.sub(r'ret{{[l|q]}}', asm)
-  # Strip kill operands inserted into the asm.
-  asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_arm_eabi(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Strip kill operands inserted into the asm.
-  asm = SCRUB_KILL_COMMENT_RE.sub('', asm)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_powerpc64(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Stripe unimportant comments
-  asm = SCRUB_LOOP_COMMENT_RE.sub(r'', asm)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_mips(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_riscv(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-def scrub_asm_systemz(asm, args):
-  # Scrub runs of whitespace out of the assembly, but leave the leading
-  # whitespace in place.
-  asm = SCRUB_WHITESPACE_RE.sub(r' ', asm)
-  # Expand the tabs used for indentation.
-  asm = string.expandtabs(asm, 2)
-  # Strip trailing whitespace.
-  asm = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm)
-  return asm
-
-
-# Build up a dictionary of all the function bodies.
-def build_function_body_dictionary(raw_tool_output, triple, prefixes, func_dict,
-                                   args):
-  target_handlers = {
-      'x86_64': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
-      'i686': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
-      'x86': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
-      'i386': (scrub_asm_x86, ASM_FUNCTION_X86_RE),
-      'aarch64': (scrub_asm_arm_eabi, ASM_FUNCTION_AARCH64_RE),
-      'arm-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6t2': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6t2-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv6m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv7m': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv7m-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv8-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv8m.base': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'thumbv8m.main': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv6': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv7': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv7-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armeb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv7eb-eabi': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'armv7eb': (scrub_asm_arm_eabi, ASM_FUNCTION_ARM_RE),
-      'mips': (scrub_asm_mips, ASM_FUNCTION_MIPS_RE),
-      'powerpc64': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
-      'powerpc64le': (scrub_asm_powerpc64, ASM_FUNCTION_PPC_RE),
-      'riscv32': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
-      'riscv64': (scrub_asm_riscv, ASM_FUNCTION_RISCV_RE),
-      's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE),
-  }
-  handlers = None
-  for prefix, s in target_handlers.items():
-    if triple.startswith(prefix):
-      handlers = s
-      break
-  else:
-    raise KeyError('Triple %r is not supported' % (triple))
-
-  scrubber, function_re = handlers
-  for m in function_re.finditer(raw_tool_output):
-    if not m:
-      continue
-    func = m.group('func')
-    scrubbed_body = scrubber(m.group('body'), args)
-    if func.startswith('stress'):
-      # We only use the last line of the function body for stress tests.
-      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
-    if args.verbose:
-      print >>sys.stderr, 'Processing function: ' + func
-      for l in scrubbed_body.splitlines():
-        print >>sys.stderr, '  ' + l
-    for prefix in prefixes:
-      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
-        if prefix == prefixes[-1]:
-          print >>sys.stderr, ('WARNING: Found conflicting asm under the '
-                               'same prefix: %r!' % (prefix,))
-        else:
-          func_dict[prefix][func] = None
-          continue
+from UpdateTestChecks import asm, common
 
 
-      func_dict[prefix][func] = scrubbed_body
+ADVERT = '; NOTE: Assertions have been autogenerated by '
 
 
 
 
 def add_checks(output_lines, run_list, func_dict, func_name):
 def add_checks(output_lines, run_list, func_dict, func_name):
@@ -275,21 +43,6 @@ def add_checks(output_lines, run_list, func_dict, func_name):
   return output_lines
   return output_lines
 
 
 
 
-def should_add_line_to_output(input_line, prefix_set):
-  # Skip any blank comment lines in the IR.
-  if input_line.strip() == ';':
-    return False
-  # Skip any blank lines in the IR.
-  #if input_line.strip() == '':
-  #  return False
-  # And skip any CHECK lines. We're building our own.
-  m = CHECK_RE.match(input_line)
-  if m and m.group(1) in prefix_set:
-    return False
-
-  return True
-
-
 def main():
 def main():
   parser = argparse.ArgumentParser(description=__doc__)
   parser = argparse.ArgumentParser(description=__doc__)
   parser.add_argument('-v', '--verbose', action='store_true',
   parser.add_argument('-v', '--verbose', action='store_true',
@@ -304,8 +57,7 @@ def main():
   parser.add_argument('tests', nargs='+')
   parser.add_argument('tests', nargs='+')
   args = parser.parse_args()
   args = parser.parse_args()
 
 
-  autogenerated_note = ('; NOTE: Assertions have been autogenerated by '
-                        'utils/' + os.path.basename(__file__))
+  autogenerated_note = (ADVERT + 'utils/' + os.path.basename(__file__))
 
 
   for test in args.tests:
   for test in args.tests:
     if args.verbose:
     if args.verbose:
@@ -315,13 +67,13 @@ def main():
 
 
     triple_in_ir = None
     triple_in_ir = None
     for l in input_lines:
     for l in input_lines:
-      m = TRIPLE_IR_RE.match(l)
+      m = common.TRIPLE_IR_RE.match(l)
       if m:
       if m:
         triple_in_ir = m.groups()[0]
         triple_in_ir = m.groups()[0]
         break
         break
 
 
     raw_lines = [m.group(1)
     raw_lines = [m.group(1)
-                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
+                 for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
     for l in raw_lines[1:]:
     for l in raw_lines[1:]:
       if run_lines[-1].endswith("\\"):
       if run_lines[-1].endswith("\\"):
@@ -340,7 +92,7 @@ def main():
       llc_cmd = commands[0]
       llc_cmd = commands[0]
 
 
       triple_in_cmd = None
       triple_in_cmd = None
-      m = TRIPLE_ARG_RE.search(llc_cmd)
+      m = common.TRIPLE_ARG_RE.search(llc_cmd)
       if m:
       if m:
         triple_in_cmd = m.groups()[0]
         triple_in_cmd = m.groups()[0]
 
 
@@ -358,7 +110,7 @@ def main():
       llc_cmd_args = llc_cmd[len('llc'):].strip()
       llc_cmd_args = llc_cmd[len('llc'):].strip()
       llc_cmd_args = llc_cmd_args.replace('< %s', '').replace('%s', '').strip()
       llc_cmd_args = llc_cmd_args.replace('< %s', '').replace('%s', '').strip()
 
 
-      check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+      check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
                                for item in m.group(1).split(',')]
                                for item in m.group(1).split(',')]
       if not check_prefixes:
       if not check_prefixes:
         check_prefixes = ['CHECK']
         check_prefixes = ['CHECK']
@@ -377,12 +129,12 @@ def main():
         print >>sys.stderr, 'Extracted LLC cmd: llc ' + llc_args
         print >>sys.stderr, 'Extracted LLC cmd: llc ' + llc_args
         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
 
 
-      raw_tool_output = llc(args, llc_args, test)
+      raw_tool_output = common.invoke_tool(args.llc_binary, llc_args, test)
       if not (triple_in_cmd or triple_in_ir):
       if not (triple_in_cmd or triple_in_ir):
         print >>sys.stderr, "Cannot find a triple. Assume 'x86'"
         print >>sys.stderr, "Cannot find a triple. Assume 'x86'"
 
 
-      build_function_body_dictionary(raw_tool_output,
-          triple_in_cmd or triple_in_ir or 'x86', prefixes, func_dict, args)
+      asm.build_function_body_dictionary_for_triple(args, raw_tool_output,
+          triple_in_cmd or triple_in_ir or 'x86', prefixes, func_dict)
 
 
     is_in_function = False
     is_in_function = False
     is_in_function_start = False
     is_in_function_start = False
@@ -398,7 +150,7 @@ def main():
         if input_line == '':
         if input_line == '':
           continue
           continue
         if input_line.lstrip().startswith(';'):
         if input_line.lstrip().startswith(';'):
-          m = CHECK_RE.match(input_line)
+          m = common.CHECK_RE.match(input_line)
           if not m or m.group(1) not in prefix_set:
           if not m or m.group(1) not in prefix_set:
             output_lines.append(input_line)
             output_lines.append(input_line)
             continue
             continue
@@ -408,7 +160,7 @@ def main():
         is_in_function_start = False
         is_in_function_start = False
 
 
       if is_in_function:
       if is_in_function:
-        if should_add_line_to_output(input_line, prefix_set) == True:
+        if common.should_add_line_to_output(input_line, prefix_set):
           # This input line of the function body will go as-is into the output.
           # This input line of the function body will go as-is into the output.
           output_lines.append(input_line)
           output_lines.append(input_line)
         else:
         else:
@@ -417,13 +169,14 @@ def main():
           is_in_function = False
           is_in_function = False
         continue
         continue
 
 
-      if input_line == autogenerated_note:
+      # Discard any previous script advertising.
+      if input_line.startswith(ADVERT):
         continue
         continue
 
 
       # If it's outside a function, it just gets copied to the output.
       # If it's outside a function, it just gets copied to the output.
       output_lines.append(input_line)
       output_lines.append(input_line)
 
 
-      m = IR_FUNCTION_RE.match(input_line)
+      m = common.IR_FUNCTION_RE.match(input_line)
       if not m:
       if not m:
         continue
         continue
       func_name = m.group(1)
       func_name = m.group(1)

+ 16 - 70
utils/update_test_checks.py

@@ -38,76 +38,36 @@ import sys
 import tempfile
 import tempfile
 import re
 import re
 
 
+from UpdateTestChecks import common
+
 ADVERT = '; NOTE: Assertions have been autogenerated by '
 ADVERT = '; NOTE: Assertions have been autogenerated by '
 
 
 # RegEx: this is where the magic happens.
 # RegEx: this is where the magic happens.
 
 
-SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
-SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
-SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
-SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
 SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
 
 
-RUN_LINE_RE = re.compile('^\s*;\s*RUN:\s*(.*)$')
 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
 IR_FUNCTION_RE = re.compile('^\s*define\s+(?:internal\s+)?[^@]*@([\w-]+)\s*\(')
 OPT_FUNCTION_RE = re.compile(
 OPT_FUNCTION_RE = re.compile(
     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
     r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*\('
     r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
     r'(\s+)?[^)]*[^{]*\{\n(?P<body>.*?)^\}$',
     flags=(re.M | re.S))
     flags=(re.M | re.S))
-CHECK_PREFIX_RE = re.compile('--?check-prefix(?:es)?=(\S+)')
-CHECK_RE = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
 # Match things that look at identifiers, but only if they are followed by
 # Match things that look at identifiers, but only if they are followed by
 # spaces, commas, paren, or end of the string
 # spaces, commas, paren, or end of the string
 IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
 IR_VALUE_RE = re.compile(r'(\s+)%([\w\.]+?)([,\s\(\)]|\Z)')
 
 
 
 
-# Invoke the tool that is being tested.
-def invoke_tool(args, cmd_args, ir):
-  with open(ir) as ir_file:
-    stdout = subprocess.check_output(args.opt_binary + ' ' + cmd_args,
-                                     shell=True, stdin=ir_file)
-  # Fix line endings to unix CR style.
-  stdout = stdout.replace('\r\n', '\n')
-  return stdout
-
 
 
 def scrub_body(body, opt_basename):
 def scrub_body(body, opt_basename):
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # Scrub runs of whitespace out of the assembly, but leave the leading
   # whitespace in place.
   # whitespace in place.
-  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
+  body = common.SCRUB_WHITESPACE_RE.sub(r' ', body)
   # Expand the tabs used for indentation.
   # Expand the tabs used for indentation.
   body = string.expandtabs(body, 2)
   body = string.expandtabs(body, 2)
   # Strip trailing whitespace.
   # Strip trailing whitespace.
-  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
+  body = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
   return body
   return body
 
 
 
 
-# Build up a dictionary of all the function bodies.
-def build_function_body_dictionary(raw_tool_output, prefixes, func_dict, verbose, opt_basename):
-  func_regex = OPT_FUNCTION_RE
-  for m in func_regex.finditer(raw_tool_output):
-    if not m:
-      continue
-    func = m.group('func')
-    scrubbed_body = scrub_body(m.group('body'), opt_basename)
-    if func.startswith('stress'):
-      # We only use the last line of the function body for stress tests.
-      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
-    if verbose:
-      print >>sys.stderr, 'Processing function: ' + func
-      for l in scrubbed_body.splitlines():
-        print >>sys.stderr, '  ' + l
-    for prefix in prefixes:
-      if func in func_dict[prefix] and func_dict[prefix][func] != scrubbed_body:
-        if prefix == prefixes[-1]:
-          print >>sys.stderr, ('WARNING: Found conflicting asm under the '
-                               'same prefix: %r!' % (prefix,))
-        else:
-          func_dict[prefix][func] = None
-          continue
-
-      func_dict[prefix][func] = scrubbed_body
-
 
 
 # Create a FileCheck variable name based on an IR name.
 # Create a FileCheck variable name based on an IR name.
 def get_value_name(var):
 def get_value_name(var):
@@ -213,21 +173,6 @@ def add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename):
   return output_lines
   return output_lines
 
 
 
 
-def should_add_line_to_output(input_line, prefix_set):
-  # Skip any blank comment lines in the IR.
-  if input_line.strip() == ';':
-    return False
-  # Skip any blank lines in the IR.
-  #if input_line.strip() == '':
-  #  return False
-  # And skip any CHECK lines. We're building our own.
-  m = CHECK_RE.match(input_line)
-  if m and m.group(1) in prefix_set:
-    return False
-
-  return True
-
-
 def main():
 def main():
   from argparse import RawTextHelpFormatter
   from argparse import RawTextHelpFormatter
   parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
   parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
@@ -254,7 +199,7 @@ def main():
       input_lines = [l.rstrip() for l in f]
       input_lines = [l.rstrip() for l in f]
 
 
     raw_lines = [m.group(1)
     raw_lines = [m.group(1)
-                 for m in [RUN_LINE_RE.match(l) for l in input_lines] if m]
+                 for m in [common.RUN_LINE_RE.match(l) for l in input_lines] if m]
     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
     run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
     for l in raw_lines[1:]:
     for l in raw_lines[1:]:
       if run_lines[-1].endswith("\\"):
       if run_lines[-1].endswith("\\"):
@@ -282,7 +227,7 @@ def main():
       tool_cmd_args = tool_cmd[len(opt_basename):].strip()
       tool_cmd_args = tool_cmd[len(opt_basename):].strip()
       tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
       tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
 
 
-      check_prefixes = [item for m in CHECK_PREFIX_RE.finditer(filecheck_cmd)
+      check_prefixes = [item for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
                                for item in m.group(1).split(',')]
                                for item in m.group(1).split(',')]
       if not check_prefixes:
       if not check_prefixes:
         check_prefixes = ['CHECK']
         check_prefixes = ['CHECK']
@@ -300,8 +245,10 @@ def main():
         print >>sys.stderr, 'Extracted opt cmd: ' + opt_basename + ' ' + opt_args
         print >>sys.stderr, 'Extracted opt cmd: ' + opt_basename + ' ' + opt_args
         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
         print >>sys.stderr, 'Extracted FileCheck prefixes: ' + str(prefixes)
 
 
-      raw_tool_output = invoke_tool(args, opt_args, test)
-      build_function_body_dictionary(raw_tool_output, prefixes, func_dict, args.verbose, opt_basename)
+      raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test)
+      common.build_function_body_dictionary(
+              OPT_FUNCTION_RE, scrub_body, [opt_basename], raw_tool_output,
+              prefixes, func_dict, args.verbose)
 
 
     is_in_function = False
     is_in_function = False
     is_in_function_start = False
     is_in_function_start = False
@@ -316,20 +263,20 @@ def main():
         if input_line == '':
         if input_line == '':
           continue
           continue
         if input_line.lstrip().startswith(';'):
         if input_line.lstrip().startswith(';'):
-          m = CHECK_RE.match(input_line)
+          m = common.CHECK_RE.match(input_line)
           if not m or m.group(1) not in prefix_set:
           if not m or m.group(1) not in prefix_set:
             output_lines.append(input_line)
             output_lines.append(input_line)
             continue
             continue
 
 
         # Print out the various check lines here.
         # Print out the various check lines here.
-        output_lines = add_checks(output_lines, prefix_list, func_dict, name, opt_basename)
+        output_lines = add_checks(output_lines, prefix_list, func_dict, func_name, opt_basename)
         is_in_function_start = False
         is_in_function_start = False
 
 
       if is_in_function:
       if is_in_function:
-        if should_add_line_to_output(input_line, prefix_set) == True:
+        if common.should_add_line_to_output(input_line, prefix_set):
           # This input line of the function body will go as-is into the output.
           # This input line of the function body will go as-is into the output.
           # Except make leading whitespace uniform: 2 spaces.
           # Except make leading whitespace uniform: 2 spaces.
-          input_line = SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
+          input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
           output_lines.append(input_line)
           output_lines.append(input_line)
         else:
         else:
           continue
           continue
@@ -347,8 +294,8 @@ def main():
       m = IR_FUNCTION_RE.match(input_line)
       m = IR_FUNCTION_RE.match(input_line)
       if not m:
       if not m:
         continue
         continue
-      name = m.group(1)
-      if args.function is not None and name != args.function:
+      func_name = m.group(1)
+      if args.function is not None and func_name != args.function:
         # When filtering on a specific function, skip all others.
         # When filtering on a specific function, skip all others.
         continue
         continue
       is_in_function = is_in_function_start = True
       is_in_function = is_in_function_start = True
@@ -362,4 +309,3 @@ def main():
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
   main()
   main()
-