git_hyper_blame.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. #!/usr/bin/env python3
  2. # Copyright 2016 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Wrapper around git blame that ignores certain commits.
  6. """
  7. import argparse
  8. import collections
  9. import logging
  10. import os
  11. import subprocess2
  12. import sys
  13. import git_common
  14. import git_dates
  15. import setup_color
  16. logging.getLogger().setLevel(logging.INFO)
  17. DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
  18. class Commit(object):
  19. """Info about a commit."""
  20. def __init__(self, commithash):
  21. self.commithash = commithash
  22. self.author = None
  23. self.author_mail = None
  24. self.author_time = None
  25. self.author_tz = None
  26. self.committer = None
  27. self.committer_mail = None
  28. self.committer_time = None
  29. self.committer_tz = None
  30. self.summary = None
  31. self.boundary = None
  32. self.previous = None
  33. self.filename = None
  34. def __repr__(self): # pragma: no cover
  35. return '<Commit %s>' % self.commithash
  36. BlameLine = collections.namedtuple(
  37. 'BlameLine',
  38. 'commit context lineno_then lineno_now modified')
  39. def parse_blame(blameoutput):
  40. """Parses the output of git blame -p into a data structure."""
  41. lines = blameoutput.split('\n')
  42. i = 0
  43. commits = {}
  44. while i < len(lines):
  45. # Read a commit line and parse it.
  46. line = lines[i]
  47. i += 1
  48. if not line.strip():
  49. continue
  50. commitline = line.split()
  51. commithash = commitline[0]
  52. lineno_then = int(commitline[1])
  53. lineno_now = int(commitline[2])
  54. try:
  55. commit = commits[commithash]
  56. except KeyError:
  57. commit = Commit(commithash)
  58. commits[commithash] = commit
  59. # Read commit details until we find a context line.
  60. while i < len(lines):
  61. line = lines[i]
  62. i += 1
  63. if line.startswith('\t'):
  64. break
  65. try:
  66. key, value = line.split(' ', 1)
  67. except ValueError:
  68. key = line
  69. value = True
  70. setattr(commit, key.replace('-', '_'), value)
  71. context = line[1:]
  72. yield BlameLine(commit, context, lineno_then, lineno_now, False)
  73. def print_table(outbuf, table, align):
  74. """Print a 2D rectangular array, aligning columns with spaces.
  75. Args:
  76. align: string of 'l' and 'r', designating whether each column is left- or
  77. right-aligned.
  78. """
  79. if len(table) == 0:
  80. return
  81. colwidths = None
  82. for row in table:
  83. if colwidths is None:
  84. colwidths = [len(x) for x in row]
  85. else:
  86. colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
  87. for row in table:
  88. cells = []
  89. for i, cell in enumerate(row):
  90. padding = ' ' * (colwidths[i] - len(cell))
  91. if align[i] == 'r':
  92. cell = padding + cell
  93. elif i < len(row) - 1:
  94. # Do not pad the final column if left-aligned.
  95. cell += padding
  96. cells.append(cell.encode('utf-8', 'replace'))
  97. try:
  98. outbuf.write(b' '.join(cells) + b'\n')
  99. except IOError: # pragma: no cover
  100. # Can happen on Windows if the pipe is closed early.
  101. pass
  102. def pretty_print(outbuf, parsedblame, show_filenames=False):
  103. """Pretty-prints the output of parse_blame."""
  104. table = []
  105. for line in parsedblame:
  106. author_time = git_dates.timestamp_offset_to_datetime(
  107. line.commit.author_time, line.commit.author_tz)
  108. row = [line.commit.commithash[:8],
  109. '(' + line.commit.author,
  110. git_dates.datetime_string(author_time),
  111. str(line.lineno_now) + ('*' if line.modified else '') + ')',
  112. line.context]
  113. if show_filenames:
  114. row.insert(1, line.commit.filename)
  115. table.append(row)
  116. print_table(outbuf, table, align='llllrl' if show_filenames else 'lllrl')
  117. def get_parsed_blame(filename, revision='HEAD'):
  118. blame = git_common.blame(filename, revision=revision, porcelain=True)
  119. return list(parse_blame(blame))
  120. # Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
  121. # only the hunk line numbers, not the actual diff contents).
  122. # hunk list contains (old, new) pairs, where old and new are (start, length)
  123. # pairs. A hunk list can also be None (if the diff failed).
  124. diff_hunks_cache = {}
  125. def cache_diff_hunks(oldrev, newrev):
  126. def parse_start_length(s):
  127. # Chop the '-' or '+'.
  128. s = s[1:]
  129. # Length is optional (defaults to 1).
  130. try:
  131. start, length = s.split(',')
  132. except ValueError:
  133. start = s
  134. length = 1
  135. return int(start), int(length)
  136. try:
  137. return diff_hunks_cache[(oldrev, newrev)]
  138. except KeyError:
  139. pass
  140. # Use -U0 to get the smallest possible hunks.
  141. diff = git_common.diff(oldrev, newrev, '-U0')
  142. # Get all the hunks.
  143. hunks = []
  144. for line in diff.split('\n'):
  145. if not line.startswith('@@'):
  146. continue
  147. ranges = line.split(' ', 3)[1:3]
  148. ranges = tuple(parse_start_length(r) for r in ranges)
  149. hunks.append(ranges)
  150. diff_hunks_cache[(oldrev, newrev)] = hunks
  151. return hunks
  152. def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
  153. lineno):
  154. """Computes the approximate movement of a line number between two revisions.
  155. Consider line |lineno| in |filename| at |revision|. This function computes the
  156. line number of that line in |newfilename| at |newrevision|. This is
  157. necessarily approximate.
  158. Args:
  159. filename: The file (within the repo) at |revision|.
  160. newfilename: The name of the same file at |newrevision|.
  161. revision: A git revision.
  162. newrevision: Another git revision. Note: Can be ahead or behind |revision|.
  163. lineno: Line number within |filename| at |revision|.
  164. Returns:
  165. Line number within |newfilename| at |newrevision|.
  166. """
  167. # This doesn't work that well if there are a lot of line changes within the
  168. # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
  169. # A fuzzy heuristic that takes the text of the new line and tries to find a
  170. # deleted line within the hunk that mostly matches the new line could help.
  171. # Use the <revision>:<filename> syntax to diff between two blobs. This is the
  172. # only way to diff a file that has been renamed.
  173. old = '%s:%s' % (revision, filename)
  174. new = '%s:%s' % (newrevision, newfilename)
  175. hunks = cache_diff_hunks(old, new)
  176. cumulative_offset = 0
  177. # Find the hunk containing lineno (if any).
  178. for (oldstart, oldlength), (newstart, newlength) in hunks:
  179. cumulative_offset += newlength - oldlength
  180. if lineno >= oldstart + oldlength:
  181. # Not there yet.
  182. continue
  183. if lineno < oldstart:
  184. # Gone too far.
  185. break
  186. # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
  187. # newrevision.
  188. # If newlength == 0, newstart will be the line before the deleted hunk.
  189. # Since the line must have been deleted, just return that as the nearest
  190. # line in the new file. Caution: newstart can be 0 in this case.
  191. if newlength == 0:
  192. return max(1, newstart)
  193. newend = newstart + newlength - 1
  194. # Move lineno based on the amount the entire hunk shifted.
  195. lineno = lineno + newstart - oldstart
  196. # Constrain the output within the range [newstart, newend].
  197. return min(newend, max(newstart, lineno))
  198. # Wasn't in a hunk. Figure out the line motion based on the difference in
  199. # length between the hunks seen so far.
  200. return lineno + cumulative_offset
  201. def hyper_blame(outbuf, ignored, filename, revision):
  202. # Map from commit to parsed blame from that commit.
  203. blame_from = {}
  204. filename = os.path.normpath(filename)
  205. def cache_blame_from(filename, commithash):
  206. try:
  207. return blame_from[commithash]
  208. except KeyError:
  209. parsed = get_parsed_blame(filename, commithash)
  210. blame_from[commithash] = parsed
  211. return parsed
  212. try:
  213. parsed = cache_blame_from(filename, git_common.hash_one(revision))
  214. except subprocess2.CalledProcessError as e:
  215. sys.stderr.write(e.stderr.decode())
  216. return e.returncode
  217. new_parsed = []
  218. # We don't show filenames in blame output unless we have to.
  219. show_filenames = False
  220. for line in parsed:
  221. # If a line references an ignored commit, blame that commit's parent
  222. # repeatedly until we find a non-ignored commit.
  223. while line.commit.commithash in ignored:
  224. if line.commit.previous is None:
  225. # You can't ignore the commit that added this file.
  226. break
  227. previouscommit, previousfilename = line.commit.previous.split(' ', 1)
  228. parent_blame = cache_blame_from(previousfilename, previouscommit)
  229. if len(parent_blame) == 0:
  230. # The previous version of this file was empty, therefore, you can't
  231. # ignore this commit.
  232. break
  233. # line.lineno_then is the line number in question at line.commit. We need
  234. # to translate that line number so that it refers to the position of the
  235. # same line on previouscommit.
  236. lineno_previous = approx_lineno_across_revs(
  237. line.commit.filename, previousfilename, line.commit.commithash,
  238. previouscommit, line.lineno_then)
  239. logging.debug('ignore commit %s on line p%d/t%d/n%d',
  240. line.commit.commithash, lineno_previous, line.lineno_then,
  241. line.lineno_now)
  242. # Get the line at lineno_previous in the parent commit.
  243. assert 1 <= lineno_previous <= len(parent_blame)
  244. newline = parent_blame[lineno_previous - 1]
  245. # Replace the commit and lineno_then, but not the lineno_now or context.
  246. line = BlameLine(newline.commit, line.context, newline.lineno_then,
  247. line.lineno_now, True)
  248. logging.debug(' replacing with %r', line)
  249. # If any line has a different filename to the file's current name, turn on
  250. # filename display for the entire blame output.
  251. # Use normpath to make variable consistent across platforms.
  252. if os.path.normpath(line.commit.filename) != filename:
  253. show_filenames = True
  254. new_parsed.append(line)
  255. pretty_print(outbuf, new_parsed, show_filenames=show_filenames)
  256. return 0
  257. def parse_ignore_file(ignore_file):
  258. for line in ignore_file:
  259. line = line.split('#', 1)[0].strip()
  260. if line:
  261. yield line
  262. def main(args, outbuf):
  263. parser = argparse.ArgumentParser(
  264. prog='git hyper-blame',
  265. description='git blame with support for ignoring certain commits.')
  266. parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
  267. default=[], help='a revision to ignore')
  268. parser.add_argument('--ignore-file', metavar='FILE', dest='ignore_file',
  269. help='a file containing a list of revisions to ignore')
  270. parser.add_argument('--no-default-ignores', dest='no_default_ignores',
  271. action='store_true',
  272. help='Do not ignore commits from .git-blame-ignore-revs.')
  273. parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
  274. help='revision to look at')
  275. parser.add_argument('filename', metavar='FILE', help='filename to blame')
  276. args = parser.parse_args(args)
  277. try:
  278. repo_root = git_common.repo_root()
  279. except subprocess2.CalledProcessError as e:
  280. sys.stderr.write(e.stderr.decode())
  281. return e.returncode
  282. # Make filename relative to the repository root, and cd to the root dir (so
  283. # all filenames throughout this script are relative to the root).
  284. filename = os.path.relpath(args.filename, repo_root)
  285. os.chdir(repo_root)
  286. # Normalize filename so we can compare it to other filenames git gives us.
  287. filename = os.path.normpath(filename)
  288. filename = os.path.normcase(filename)
  289. ignored_list = list(args.ignored)
  290. if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
  291. with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
  292. ignored_list.extend(parse_ignore_file(ignore_file))
  293. if args.ignore_file:
  294. with open(args.ignore_file) as ignore_file:
  295. ignored_list.extend(parse_ignore_file(ignore_file))
  296. ignored = set()
  297. for c in ignored_list:
  298. try:
  299. ignored.add(git_common.hash_one(c))
  300. except subprocess2.CalledProcessError as e:
  301. # Custom warning string (the message from git-rev-parse is inappropriate).
  302. sys.stderr.write('warning: unknown revision \'%s\'.\n' % c)
  303. return hyper_blame(outbuf, ignored, filename, args.revision)
  304. if __name__ == '__main__': # pragma: no cover
  305. setup_color.init()
  306. with git_common.less() as less_input:
  307. sys.exit(main(sys.argv[1:], less_input))