git_hyper_blame.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. #!/usr/bin/env python
  2. # Copyright 2016 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Wrapper around git blame that ignores certain commits.
  6. """
  7. from __future__ import print_function
  8. from __future__ import unicode_literals
  9. import argparse
  10. import collections
  11. import logging
  12. import os
  13. import subprocess2
  14. import sys
  15. import git_common
  16. import git_dates
  17. import setup_color
  18. logging.getLogger().setLevel(logging.INFO)
  19. DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
  20. class Commit(object):
  21. """Info about a commit."""
  22. def __init__(self, commithash):
  23. self.commithash = commithash
  24. self.author = None
  25. self.author_mail = None
  26. self.author_time = None
  27. self.author_tz = None
  28. self.committer = None
  29. self.committer_mail = None
  30. self.committer_time = None
  31. self.committer_tz = None
  32. self.summary = None
  33. self.boundary = None
  34. self.previous = None
  35. self.filename = None
  36. def __repr__(self): # pragma: no cover
  37. return '<Commit %s>' % self.commithash
  38. BlameLine = collections.namedtuple(
  39. 'BlameLine',
  40. 'commit context lineno_then lineno_now modified')
  41. def parse_blame(blameoutput):
  42. """Parses the output of git blame -p into a data structure."""
  43. lines = blameoutput.split('\n')
  44. i = 0
  45. commits = {}
  46. while i < len(lines):
  47. # Read a commit line and parse it.
  48. line = lines[i]
  49. i += 1
  50. if not line.strip():
  51. continue
  52. commitline = line.split()
  53. commithash = commitline[0]
  54. lineno_then = int(commitline[1])
  55. lineno_now = int(commitline[2])
  56. try:
  57. commit = commits[commithash]
  58. except KeyError:
  59. commit = Commit(commithash)
  60. commits[commithash] = commit
  61. # Read commit details until we find a context line.
  62. while i < len(lines):
  63. line = lines[i]
  64. i += 1
  65. if line.startswith('\t'):
  66. break
  67. try:
  68. key, value = line.split(' ', 1)
  69. except ValueError:
  70. key = line
  71. value = True
  72. setattr(commit, key.replace('-', '_'), value)
  73. context = line[1:]
  74. yield BlameLine(commit, context, lineno_then, lineno_now, False)
  75. def print_table(table, align=None, out=sys.stdout):
  76. """Print a 2D rectangular array, aligning columns with spaces.
  77. Args:
  78. align: Optional string of 'l' and 'r', designating whether each column is
  79. left- or right-aligned. Defaults to left aligned.
  80. """
  81. if len(table) == 0:
  82. return
  83. colwidths = None
  84. for row in table:
  85. if colwidths is None:
  86. colwidths = [len(x) for x in row]
  87. else:
  88. colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
  89. if align is None: # pragma: no cover
  90. align = 'l' * len(colwidths)
  91. for row in table:
  92. cells = []
  93. for i, cell in enumerate(row):
  94. padding = ' ' * (colwidths[i] - len(cell))
  95. if align[i] == 'r':
  96. cell = padding + cell
  97. elif i < len(row) - 1:
  98. # Do not pad the final column if left-aligned.
  99. cell += padding
  100. cell = cell.encode('utf-8', 'replace')
  101. cells.append(cell)
  102. try:
  103. out.write(b' '.join(cells) + b'\n')
  104. except IOError: # pragma: no cover
  105. # Can happen on Windows if the pipe is closed early.
  106. pass
  107. def pretty_print(parsedblame, show_filenames=False, out=sys.stdout):
  108. """Pretty-prints the output of parse_blame."""
  109. table = []
  110. for line in parsedblame:
  111. author_time = git_dates.timestamp_offset_to_datetime(
  112. line.commit.author_time, line.commit.author_tz)
  113. row = [line.commit.commithash[:8],
  114. '(' + line.commit.author,
  115. git_dates.datetime_string(author_time),
  116. str(line.lineno_now) + ('*' if line.modified else '') + ')',
  117. line.context]
  118. if show_filenames:
  119. row.insert(1, line.commit.filename)
  120. table.append(row)
  121. print_table(table, align='llllrl' if show_filenames else 'lllrl', out=out)
  122. def get_parsed_blame(filename, revision='HEAD'):
  123. blame = git_common.blame(filename, revision=revision, porcelain=True)
  124. return list(parse_blame(blame))
  125. # Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
  126. # only the hunk line numbers, not the actual diff contents).
  127. # hunk list contains (old, new) pairs, where old and new are (start, length)
  128. # pairs. A hunk list can also be None (if the diff failed).
  129. diff_hunks_cache = {}
  130. def cache_diff_hunks(oldrev, newrev):
  131. def parse_start_length(s):
  132. # Chop the '-' or '+'.
  133. s = s[1:]
  134. # Length is optional (defaults to 1).
  135. try:
  136. start, length = s.split(',')
  137. except ValueError:
  138. start = s
  139. length = 1
  140. return int(start), int(length)
  141. try:
  142. return diff_hunks_cache[(oldrev, newrev)]
  143. except KeyError:
  144. pass
  145. # Use -U0 to get the smallest possible hunks.
  146. diff = git_common.diff(oldrev, newrev, '-U0')
  147. # Get all the hunks.
  148. hunks = []
  149. for line in diff.split('\n'):
  150. if not line.startswith('@@'):
  151. continue
  152. ranges = line.split(' ', 3)[1:3]
  153. ranges = tuple(parse_start_length(r) for r in ranges)
  154. hunks.append(ranges)
  155. diff_hunks_cache[(oldrev, newrev)] = hunks
  156. return hunks
  157. def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
  158. lineno):
  159. """Computes the approximate movement of a line number between two revisions.
  160. Consider line |lineno| in |filename| at |revision|. This function computes the
  161. line number of that line in |newfilename| at |newrevision|. This is
  162. necessarily approximate.
  163. Args:
  164. filename: The file (within the repo) at |revision|.
  165. newfilename: The name of the same file at |newrevision|.
  166. revision: A git revision.
  167. newrevision: Another git revision. Note: Can be ahead or behind |revision|.
  168. lineno: Line number within |filename| at |revision|.
  169. Returns:
  170. Line number within |newfilename| at |newrevision|.
  171. """
  172. # This doesn't work that well if there are a lot of line changes within the
  173. # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
  174. # A fuzzy heuristic that takes the text of the new line and tries to find a
  175. # deleted line within the hunk that mostly matches the new line could help.
  176. # Use the <revision>:<filename> syntax to diff between two blobs. This is the
  177. # only way to diff a file that has been renamed.
  178. old = '%s:%s' % (revision, filename)
  179. new = '%s:%s' % (newrevision, newfilename)
  180. hunks = cache_diff_hunks(old, new)
  181. cumulative_offset = 0
  182. # Find the hunk containing lineno (if any).
  183. for (oldstart, oldlength), (newstart, newlength) in hunks:
  184. cumulative_offset += newlength - oldlength
  185. if lineno >= oldstart + oldlength:
  186. # Not there yet.
  187. continue
  188. if lineno < oldstart:
  189. # Gone too far.
  190. break
  191. # lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
  192. # newrevision.
  193. # If newlength == 0, newstart will be the line before the deleted hunk.
  194. # Since the line must have been deleted, just return that as the nearest
  195. # line in the new file. Caution: newstart can be 0 in this case.
  196. if newlength == 0:
  197. return max(1, newstart)
  198. newend = newstart + newlength - 1
  199. # Move lineno based on the amount the entire hunk shifted.
  200. lineno = lineno + newstart - oldstart
  201. # Constrain the output within the range [newstart, newend].
  202. return min(newend, max(newstart, lineno))
  203. # Wasn't in a hunk. Figure out the line motion based on the difference in
  204. # length between the hunks seen so far.
  205. return lineno + cumulative_offset
  206. def hyper_blame(ignored, filename, revision='HEAD', out=sys.stdout,
  207. err=sys.stderr):
  208. # Map from commit to parsed blame from that commit.
  209. blame_from = {}
  210. def cache_blame_from(filename, commithash):
  211. try:
  212. return blame_from[commithash]
  213. except KeyError:
  214. parsed = get_parsed_blame(filename, commithash)
  215. blame_from[commithash] = parsed
  216. return parsed
  217. try:
  218. parsed = cache_blame_from(filename, git_common.hash_one(revision))
  219. except subprocess2.CalledProcessError as e:
  220. err.write(e.stderr.decode())
  221. return e.returncode
  222. new_parsed = []
  223. # We don't show filenames in blame output unless we have to.
  224. show_filenames = False
  225. for line in parsed:
  226. # If a line references an ignored commit, blame that commit's parent
  227. # repeatedly until we find a non-ignored commit.
  228. while line.commit.commithash in ignored:
  229. if line.commit.previous is None:
  230. # You can't ignore the commit that added this file.
  231. break
  232. previouscommit, previousfilename = line.commit.previous.split(' ', 1)
  233. parent_blame = cache_blame_from(previousfilename, previouscommit)
  234. if len(parent_blame) == 0:
  235. # The previous version of this file was empty, therefore, you can't
  236. # ignore this commit.
  237. break
  238. # line.lineno_then is the line number in question at line.commit. We need
  239. # to translate that line number so that it refers to the position of the
  240. # same line on previouscommit.
  241. lineno_previous = approx_lineno_across_revs(
  242. line.commit.filename, previousfilename, line.commit.commithash,
  243. previouscommit, line.lineno_then)
  244. logging.debug('ignore commit %s on line p%d/t%d/n%d',
  245. line.commit.commithash, lineno_previous, line.lineno_then,
  246. line.lineno_now)
  247. # Get the line at lineno_previous in the parent commit.
  248. assert 1 <= lineno_previous <= len(parent_blame)
  249. newline = parent_blame[lineno_previous - 1]
  250. # Replace the commit and lineno_then, but not the lineno_now or context.
  251. line = BlameLine(newline.commit, line.context, newline.lineno_then,
  252. line.lineno_now, True)
  253. logging.debug(' replacing with %r', line)
  254. # If any line has a different filename to the file's current name, turn on
  255. # filename display for the entire blame output.
  256. if line.commit.filename != filename:
  257. show_filenames = True
  258. new_parsed.append(line)
  259. pretty_print(new_parsed, show_filenames=show_filenames, out=out)
  260. return 0
  261. def parse_ignore_file(ignore_file):
  262. for line in ignore_file:
  263. line = line.split('#', 1)[0].strip()
  264. if line:
  265. yield line
  266. def main(args, stdout=sys.stdout, stderr=sys.stderr):
  267. parser = argparse.ArgumentParser(
  268. prog='git hyper-blame',
  269. description='git blame with support for ignoring certain commits.')
  270. parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
  271. default=[], help='a revision to ignore')
  272. parser.add_argument('--ignore-file', metavar='FILE',
  273. type=argparse.FileType('r'), dest='ignore_file',
  274. help='a file containing a list of revisions to ignore')
  275. parser.add_argument('--no-default-ignores', dest='no_default_ignores',
  276. action='store_true',
  277. help='Do not ignore commits from .git-blame-ignore-revs.')
  278. parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
  279. help='revision to look at')
  280. parser.add_argument('filename', metavar='FILE', help='filename to blame')
  281. args = parser.parse_args(args)
  282. try:
  283. repo_root = git_common.repo_root()
  284. except subprocess2.CalledProcessError as e:
  285. stderr.write(e.stderr.decode())
  286. return e.returncode
  287. # Make filename relative to the repository root, and cd to the root dir (so
  288. # all filenames throughout this script are relative to the root).
  289. filename = os.path.relpath(args.filename, repo_root)
  290. os.chdir(repo_root)
  291. # Normalize filename so we can compare it to other filenames git gives us.
  292. filename = os.path.normpath(filename)
  293. filename = os.path.normcase(filename)
  294. ignored_list = list(args.ignored)
  295. if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
  296. with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
  297. ignored_list.extend(parse_ignore_file(ignore_file))
  298. if args.ignore_file:
  299. ignored_list.extend(parse_ignore_file(args.ignore_file))
  300. ignored = set()
  301. for c in ignored_list:
  302. try:
  303. ignored.add(git_common.hash_one(c))
  304. except subprocess2.CalledProcessError as e:
  305. # Custom warning string (the message from git-rev-parse is inappropriate).
  306. stderr.write('warning: unknown revision \'%s\'.\n' % c)
  307. return hyper_blame(ignored, filename, args.revision, out=stdout, err=stderr)
  308. if __name__ == '__main__': # pragma: no cover
  309. setup_color.init()
  310. with git_common.less() as less_input:
  311. sys.exit(main(sys.argv[1:], stdout=less_input))