git_hyper_blame.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. #!/usr/bin/env python3
  2. # Copyright 2016 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Wrapper around git blame that ignores certain commits.
  6. """
  7. import argparse
  8. import collections
  9. import logging
  10. import os
  11. import subprocess2
  12. import sys
  13. import git_common
  14. import git_dates
  15. import setup_color
  16. logging.getLogger().setLevel(logging.INFO)
  17. DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
  18. class Commit(object):
  19. """Info about a commit."""
  20. def __init__(self, commithash):
  21. self.commithash = commithash
  22. self.author = None
  23. self.author_mail = None
  24. self.author_time = None
  25. self.author_tz = None
  26. self.committer = None
  27. self.committer_mail = None
  28. self.committer_time = None
  29. self.committer_tz = None
  30. self.summary = None
  31. self.boundary = None
  32. self.previous = None
  33. self.filename = None
  34. def __repr__(self): # pragma: no cover
  35. return '<Commit %s>' % self.commithash
  36. BlameLine = collections.namedtuple(
  37. 'BlameLine', 'commit context lineno_then lineno_now modified')
  38. def parse_blame(blameoutput):
  39. """Parses the output of git blame -p into a data structure."""
  40. lines = blameoutput.split('\n')
  41. i = 0
  42. commits = {}
  43. while i < len(lines):
  44. # Read a commit line and parse it.
  45. line = lines[i]
  46. i += 1
  47. if not line.strip():
  48. continue
  49. commitline = line.split()
  50. commithash = commitline[0]
  51. lineno_then = int(commitline[1])
  52. lineno_now = int(commitline[2])
  53. try:
  54. commit = commits[commithash]
  55. except KeyError:
  56. commit = Commit(commithash)
  57. commits[commithash] = commit
  58. # Read commit details until we find a context line.
  59. while i < len(lines):
  60. line = lines[i]
  61. i += 1
  62. if line.startswith('\t'):
  63. break
  64. try:
  65. key, value = line.split(' ', 1)
  66. except ValueError:
  67. key = line
  68. value = True
  69. setattr(commit, key.replace('-', '_'), value)
  70. context = line[1:]
  71. yield BlameLine(commit, context, lineno_then, lineno_now, False)
  72. def print_table(outbuf, table, align):
  73. """Print a 2D rectangular array, aligning columns with spaces.
  74. Args:
  75. align: string of 'l' and 'r', designating whether each column is
  76. left- or right-aligned.
  77. """
  78. if len(table) == 0:
  79. return
  80. colwidths = None
  81. for row in table:
  82. if colwidths is None:
  83. colwidths = [len(x) for x in row]
  84. else:
  85. colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
  86. for row in table:
  87. cells = []
  88. for i, cell in enumerate(row):
  89. padding = ' ' * (colwidths[i] - len(cell))
  90. if align[i] == 'r':
  91. cell = padding + cell
  92. elif i < len(row) - 1:
  93. # Do not pad the final column if left-aligned.
  94. cell += padding
  95. cells.append(cell.encode('utf-8', 'replace'))
  96. try:
  97. outbuf.write(b' '.join(cells) + b'\n')
  98. except IOError: # pragma: no cover
  99. # Can happen on Windows if the pipe is closed early.
  100. pass
  101. def pretty_print(outbuf, parsedblame, show_filenames=False):
  102. """Pretty-prints the output of parse_blame."""
  103. table = []
  104. for line in parsedblame:
  105. author_time = git_dates.timestamp_offset_to_datetime(
  106. line.commit.author_time, line.commit.author_tz)
  107. row = [
  108. line.commit.commithash[:8], '(' + line.commit.author,
  109. git_dates.datetime_string(author_time),
  110. str(line.lineno_now) + ('*' if line.modified else '') + ')',
  111. line.context
  112. ]
  113. if show_filenames:
  114. row.insert(1, line.commit.filename)
  115. table.append(row)
  116. print_table(outbuf, table, align='llllrl' if show_filenames else 'lllrl')
  117. def get_parsed_blame(filename, revision='HEAD'):
  118. blame = git_common.blame(filename, revision=revision, porcelain=True)
  119. return list(parse_blame(blame))
  120. # Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
  121. # only the hunk line numbers, not the actual diff contents).
  122. # hunk list contains (old, new) pairs, where old and new are (start, length)
  123. # pairs. A hunk list can also be None (if the diff failed).
  124. diff_hunks_cache = {}
  125. def cache_diff_hunks(oldrev, newrev):
  126. def parse_start_length(s):
  127. # Chop the '-' or '+'.
  128. s = s[1:]
  129. # Length is optional (defaults to 1).
  130. try:
  131. start, length = s.split(',')
  132. except ValueError:
  133. start = s
  134. length = 1
  135. return int(start), int(length)
  136. try:
  137. return diff_hunks_cache[(oldrev, newrev)]
  138. except KeyError:
  139. pass
  140. # Use -U0 to get the smallest possible hunks.
  141. diff = git_common.diff(oldrev, newrev, '-U0')
  142. # Get all the hunks.
  143. hunks = []
  144. for line in diff.split('\n'):
  145. if not line.startswith('@@'):
  146. continue
  147. ranges = line.split(' ', 3)[1:3]
  148. ranges = tuple(parse_start_length(r) for r in ranges)
  149. hunks.append(ranges)
  150. diff_hunks_cache[(oldrev, newrev)] = hunks
  151. return hunks
  152. def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
  153. lineno):
  154. """Computes the approximate movement of a line number between two revisions.
  155. Consider line |lineno| in |filename| at |revision|. This function computes
  156. the line number of that line in |newfilename| at |newrevision|. This is
  157. necessarily approximate.
  158. Args:
  159. filename: The file (within the repo) at |revision|.
  160. newfilename: The name of the same file at |newrevision|.
  161. revision: A git revision.
  162. newrevision: Another git revision. Note: Can be ahead or behind
  163. |revision|.
  164. lineno: Line number within |filename| at |revision|.
  165. Returns:
  166. Line number within |newfilename| at |newrevision|.
  167. """
  168. # This doesn't work that well if there are a lot of line changes within the
  169. # hunk (demonstrated by
  170. # GitHyperBlameLineMotionTest.testIntraHunkLineMotion). A fuzzy heuristic
  171. # that takes the text of the new line and tries to find a deleted line
  172. # within the hunk that mostly matches the new line could help.
  173. # Use the <revision>:<filename> syntax to diff between two blobs. This is
  174. # the only way to diff a file that has been renamed.
  175. old = '%s:%s' % (revision, filename)
  176. new = '%s:%s' % (newrevision, newfilename)
  177. hunks = cache_diff_hunks(old, new)
  178. cumulative_offset = 0
  179. # Find the hunk containing lineno (if any).
  180. for (oldstart, oldlength), (newstart, newlength) in hunks:
  181. cumulative_offset += newlength - oldlength
  182. if lineno >= oldstart + oldlength:
  183. # Not there yet.
  184. continue
  185. if lineno < oldstart:
  186. # Gone too far.
  187. break
  188. # lineno is in [oldstart, oldlength] at revision; [newstart, newlength]
  189. # at newrevision.
  190. # If newlength == 0, newstart will be the line before the deleted hunk.
  191. # Since the line must have been deleted, just return that as the nearest
  192. # line in the new file. Caution: newstart can be 0 in this case.
  193. if newlength == 0:
  194. return max(1, newstart)
  195. newend = newstart + newlength - 1
  196. # Move lineno based on the amount the entire hunk shifted.
  197. lineno = lineno + newstart - oldstart
  198. # Constrain the output within the range [newstart, newend].
  199. return min(newend, max(newstart, lineno))
  200. # Wasn't in a hunk. Figure out the line motion based on the difference in
  201. # length between the hunks seen so far.
  202. return lineno + cumulative_offset
  203. def hyper_blame(outbuf, ignored, filename, revision):
  204. # Map from commit to parsed blame from that commit.
  205. blame_from = {}
  206. filename = os.path.normpath(filename)
  207. def cache_blame_from(filename, commithash):
  208. try:
  209. return blame_from[commithash]
  210. except KeyError:
  211. parsed = get_parsed_blame(filename, commithash)
  212. blame_from[commithash] = parsed
  213. return parsed
  214. try:
  215. parsed = cache_blame_from(filename, git_common.hash_one(revision))
  216. except subprocess2.CalledProcessError as e:
  217. sys.stderr.write(e.stderr.decode())
  218. return e.returncode
  219. new_parsed = []
  220. # We don't show filenames in blame output unless we have to.
  221. show_filenames = False
  222. for line in parsed:
  223. # If a line references an ignored commit, blame that commit's parent
  224. # repeatedly until we find a non-ignored commit.
  225. while line.commit.commithash in ignored:
  226. if line.commit.previous is None:
  227. # You can't ignore the commit that added this file.
  228. break
  229. previouscommit, previousfilename = line.commit.previous.split(
  230. ' ', 1)
  231. parent_blame = cache_blame_from(previousfilename, previouscommit)
  232. if len(parent_blame) == 0:
  233. # The previous version of this file was empty, therefore, you
  234. # can't ignore this commit.
  235. break
  236. # line.lineno_then is the line number in question at line.commit. We
  237. # need to translate that line number so that it refers to the
  238. # position of the same line on previouscommit.
  239. lineno_previous = approx_lineno_across_revs(line.commit.filename,
  240. previousfilename,
  241. line.commit.commithash,
  242. previouscommit,
  243. line.lineno_then)
  244. logging.debug('ignore commit %s on line p%d/t%d/n%d',
  245. line.commit.commithash, lineno_previous,
  246. line.lineno_then, line.lineno_now)
  247. # Get the line at lineno_previous in the parent commit.
  248. assert 1 <= lineno_previous <= len(parent_blame)
  249. newline = parent_blame[lineno_previous - 1]
  250. # Replace the commit and lineno_then, but not the lineno_now or
  251. # context.
  252. line = BlameLine(newline.commit, line.context, newline.lineno_then,
  253. line.lineno_now, True)
  254. logging.debug(' replacing with %r', line)
  255. # If any line has a different filename to the file's current name, turn
  256. # on filename display for the entire blame output. Use normpath to make
  257. # variable consistent across platforms.
  258. if os.path.normpath(line.commit.filename) != filename:
  259. show_filenames = True
  260. new_parsed.append(line)
  261. pretty_print(outbuf, new_parsed, show_filenames=show_filenames)
  262. return 0
  263. def parse_ignore_file(ignore_file):
  264. for line in ignore_file:
  265. line = line.split('#', 1)[0].strip()
  266. if line:
  267. yield line
  268. def main(args, outbuf):
  269. parser = argparse.ArgumentParser(
  270. prog='git hyper-blame',
  271. description='git blame with support for ignoring certain commits.')
  272. parser.add_argument('-i',
  273. metavar='REVISION',
  274. action='append',
  275. dest='ignored',
  276. default=[],
  277. help='a revision to ignore')
  278. parser.add_argument('--ignore-file',
  279. metavar='FILE',
  280. dest='ignore_file',
  281. help='a file containing a list of revisions to ignore')
  282. parser.add_argument(
  283. '--no-default-ignores',
  284. dest='no_default_ignores',
  285. action='store_true',
  286. help='Do not ignore commits from .git-blame-ignore-revs.')
  287. parser.add_argument('revision',
  288. nargs='?',
  289. default='HEAD',
  290. metavar='REVISION',
  291. help='revision to look at')
  292. parser.add_argument('filename', metavar='FILE', help='filename to blame')
  293. args = parser.parse_args(args)
  294. try:
  295. repo_root = git_common.repo_root()
  296. except subprocess2.CalledProcessError as e:
  297. sys.stderr.write(e.stderr.decode())
  298. return e.returncode
  299. # Make filename relative to the repository root, and cd to the root dir (so
  300. # all filenames throughout this script are relative to the root).
  301. filename = os.path.relpath(args.filename, repo_root)
  302. os.chdir(repo_root)
  303. # Normalize filename so we can compare it to other filenames git gives us.
  304. filename = os.path.normpath(filename)
  305. filename = os.path.normcase(filename)
  306. ignored_list = list(args.ignored)
  307. if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
  308. with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
  309. ignored_list.extend(parse_ignore_file(ignore_file))
  310. if args.ignore_file:
  311. with open(args.ignore_file) as ignore_file:
  312. ignored_list.extend(parse_ignore_file(ignore_file))
  313. ignored = set()
  314. for c in ignored_list:
  315. try:
  316. ignored.add(git_common.hash_one(c))
  317. except subprocess2.CalledProcessError as e:
  318. # Custom warning string (the message from git-rev-parse is
  319. # inappropriate).
  320. sys.stderr.write('warning: unknown revision \'%s\'.\n' % c)
  321. return hyper_blame(outbuf, ignored, filename, args.revision)
  322. if __name__ == '__main__': # pragma: no cover
  323. setup_color.init()
  324. with git_common.less() as less_input:
  325. sys.exit(main(sys.argv[1:], less_input))