123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403 |
- #!/usr/bin/env python3
- # Copyright 2016 The Chromium Authors. All rights reserved.
- # Use of this source code is governed by a BSD-style license that can be
- # found in the LICENSE file.
- """Wrapper around git blame that ignores certain commits.
- """
- import argparse
- import collections
- import logging
- import os
- import subprocess2
- import sys
- import git_common
- import git_dates
- import setup_color
- logging.getLogger().setLevel(logging.INFO)
- DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
- class Commit(object):
- """Info about a commit."""
- def __init__(self, commithash):
- self.commithash = commithash
- self.author = None
- self.author_mail = None
- self.author_time = None
- self.author_tz = None
- self.committer = None
- self.committer_mail = None
- self.committer_time = None
- self.committer_tz = None
- self.summary = None
- self.boundary = None
- self.previous = None
- self.filename = None
- def __repr__(self): # pragma: no cover
- return '<Commit %s>' % self.commithash
- BlameLine = collections.namedtuple(
- 'BlameLine', 'commit context lineno_then lineno_now modified')
- def parse_blame(blameoutput):
- """Parses the output of git blame -p into a data structure."""
- lines = blameoutput.split('\n')
- i = 0
- commits = {}
- while i < len(lines):
- # Read a commit line and parse it.
- line = lines[i]
- i += 1
- if not line.strip():
- continue
- commitline = line.split()
- commithash = commitline[0]
- lineno_then = int(commitline[1])
- lineno_now = int(commitline[2])
- try:
- commit = commits[commithash]
- except KeyError:
- commit = Commit(commithash)
- commits[commithash] = commit
- # Read commit details until we find a context line.
- while i < len(lines):
- line = lines[i]
- i += 1
- if line.startswith('\t'):
- break
- try:
- key, value = line.split(' ', 1)
- except ValueError:
- key = line
- value = True
- setattr(commit, key.replace('-', '_'), value)
- context = line[1:]
- yield BlameLine(commit, context, lineno_then, lineno_now, False)
- def print_table(outbuf, table, align):
- """Print a 2D rectangular array, aligning columns with spaces.
- Args:
- align: string of 'l' and 'r', designating whether each column is
- left- or right-aligned.
- """
- if len(table) == 0:
- return
- colwidths = None
- for row in table:
- if colwidths is None:
- colwidths = [len(x) for x in row]
- else:
- colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
- for row in table:
- cells = []
- for i, cell in enumerate(row):
- padding = ' ' * (colwidths[i] - len(cell))
- if align[i] == 'r':
- cell = padding + cell
- elif i < len(row) - 1:
- # Do not pad the final column if left-aligned.
- cell += padding
- cells.append(cell.encode('utf-8', 'replace'))
- try:
- outbuf.write(b' '.join(cells) + b'\n')
- except IOError: # pragma: no cover
- # Can happen on Windows if the pipe is closed early.
- pass
- def pretty_print(outbuf, parsedblame, show_filenames=False):
- """Pretty-prints the output of parse_blame."""
- table = []
- for line in parsedblame:
- author_time = git_dates.timestamp_offset_to_datetime(
- line.commit.author_time, line.commit.author_tz)
- row = [
- line.commit.commithash[:8], '(' + line.commit.author,
- git_dates.datetime_string(author_time),
- str(line.lineno_now) + ('*' if line.modified else '') + ')',
- line.context
- ]
- if show_filenames:
- row.insert(1, line.commit.filename)
- table.append(row)
- print_table(outbuf, table, align='llllrl' if show_filenames else 'lllrl')
- def get_parsed_blame(filename, revision='HEAD'):
- blame = git_common.blame(filename, revision=revision, porcelain=True)
- return list(parse_blame(blame))
- # Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
- # only the hunk line numbers, not the actual diff contents).
- # hunk list contains (old, new) pairs, where old and new are (start, length)
- # pairs. A hunk list can also be None (if the diff failed).
- diff_hunks_cache = {}
- def cache_diff_hunks(oldrev, newrev):
- def parse_start_length(s):
- # Chop the '-' or '+'.
- s = s[1:]
- # Length is optional (defaults to 1).
- try:
- start, length = s.split(',')
- except ValueError:
- start = s
- length = 1
- return int(start), int(length)
- try:
- return diff_hunks_cache[(oldrev, newrev)]
- except KeyError:
- pass
- # Use -U0 to get the smallest possible hunks.
- diff = git_common.diff(oldrev, newrev, '-U0')
- # Get all the hunks.
- hunks = []
- for line in diff.split('\n'):
- if not line.startswith('@@'):
- continue
- ranges = line.split(' ', 3)[1:3]
- ranges = tuple(parse_start_length(r) for r in ranges)
- hunks.append(ranges)
- diff_hunks_cache[(oldrev, newrev)] = hunks
- return hunks
- def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
- lineno):
- """Computes the approximate movement of a line number between two revisions.
- Consider line |lineno| in |filename| at |revision|. This function computes
- the line number of that line in |newfilename| at |newrevision|. This is
- necessarily approximate.
- Args:
- filename: The file (within the repo) at |revision|.
- newfilename: The name of the same file at |newrevision|.
- revision: A git revision.
- newrevision: Another git revision. Note: Can be ahead or behind
- |revision|.
- lineno: Line number within |filename| at |revision|.
- Returns:
- Line number within |newfilename| at |newrevision|.
- """
- # This doesn't work that well if there are a lot of line changes within the
- # hunk (demonstrated by
- # GitHyperBlameLineMotionTest.testIntraHunkLineMotion). A fuzzy heuristic
- # that takes the text of the new line and tries to find a deleted line
- # within the hunk that mostly matches the new line could help.
- # Use the <revision>:<filename> syntax to diff between two blobs. This is
- # the only way to diff a file that has been renamed.
- old = '%s:%s' % (revision, filename)
- new = '%s:%s' % (newrevision, newfilename)
- hunks = cache_diff_hunks(old, new)
- cumulative_offset = 0
- # Find the hunk containing lineno (if any).
- for (oldstart, oldlength), (newstart, newlength) in hunks:
- cumulative_offset += newlength - oldlength
- if lineno >= oldstart + oldlength:
- # Not there yet.
- continue
- if lineno < oldstart:
- # Gone too far.
- break
- # lineno is in [oldstart, oldlength] at revision; [newstart, newlength]
- # at newrevision.
- # If newlength == 0, newstart will be the line before the deleted hunk.
- # Since the line must have been deleted, just return that as the nearest
- # line in the new file. Caution: newstart can be 0 in this case.
- if newlength == 0:
- return max(1, newstart)
- newend = newstart + newlength - 1
- # Move lineno based on the amount the entire hunk shifted.
- lineno = lineno + newstart - oldstart
- # Constrain the output within the range [newstart, newend].
- return min(newend, max(newstart, lineno))
- # Wasn't in a hunk. Figure out the line motion based on the difference in
- # length between the hunks seen so far.
- return lineno + cumulative_offset
- def hyper_blame(outbuf, ignored, filename, revision):
- # Map from commit to parsed blame from that commit.
- blame_from = {}
- filename = os.path.normpath(filename)
- def cache_blame_from(filename, commithash):
- try:
- return blame_from[commithash]
- except KeyError:
- parsed = get_parsed_blame(filename, commithash)
- blame_from[commithash] = parsed
- return parsed
- try:
- parsed = cache_blame_from(filename, git_common.hash_one(revision))
- except subprocess2.CalledProcessError as e:
- sys.stderr.write(e.stderr.decode())
- return e.returncode
- new_parsed = []
- # We don't show filenames in blame output unless we have to.
- show_filenames = False
- for line in parsed:
- # If a line references an ignored commit, blame that commit's parent
- # repeatedly until we find a non-ignored commit.
- while line.commit.commithash in ignored:
- if line.commit.previous is None:
- # You can't ignore the commit that added this file.
- break
- previouscommit, previousfilename = line.commit.previous.split(
- ' ', 1)
- parent_blame = cache_blame_from(previousfilename, previouscommit)
- if len(parent_blame) == 0:
- # The previous version of this file was empty, therefore, you
- # can't ignore this commit.
- break
- # line.lineno_then is the line number in question at line.commit. We
- # need to translate that line number so that it refers to the
- # position of the same line on previouscommit.
- lineno_previous = approx_lineno_across_revs(line.commit.filename,
- previousfilename,
- line.commit.commithash,
- previouscommit,
- line.lineno_then)
- logging.debug('ignore commit %s on line p%d/t%d/n%d',
- line.commit.commithash, lineno_previous,
- line.lineno_then, line.lineno_now)
- # Get the line at lineno_previous in the parent commit.
- assert 1 <= lineno_previous <= len(parent_blame)
- newline = parent_blame[lineno_previous - 1]
- # Replace the commit and lineno_then, but not the lineno_now or
- # context.
- line = BlameLine(newline.commit, line.context, newline.lineno_then,
- line.lineno_now, True)
- logging.debug(' replacing with %r', line)
- # If any line has a different filename to the file's current name, turn
- # on filename display for the entire blame output. Use normpath to make
- # variable consistent across platforms.
- if os.path.normpath(line.commit.filename) != filename:
- show_filenames = True
- new_parsed.append(line)
- pretty_print(outbuf, new_parsed, show_filenames=show_filenames)
- return 0
- def parse_ignore_file(ignore_file):
- for line in ignore_file:
- line = line.split('#', 1)[0].strip()
- if line:
- yield line
- def main(args, outbuf):
- parser = argparse.ArgumentParser(
- prog='git hyper-blame',
- description='git blame with support for ignoring certain commits.')
- parser.add_argument('-i',
- metavar='REVISION',
- action='append',
- dest='ignored',
- default=[],
- help='a revision to ignore')
- parser.add_argument('--ignore-file',
- metavar='FILE',
- dest='ignore_file',
- help='a file containing a list of revisions to ignore')
- parser.add_argument(
- '--no-default-ignores',
- dest='no_default_ignores',
- action='store_true',
- help='Do not ignore commits from .git-blame-ignore-revs.')
- parser.add_argument('revision',
- nargs='?',
- default='HEAD',
- metavar='REVISION',
- help='revision to look at')
- parser.add_argument('filename', metavar='FILE', help='filename to blame')
- args = parser.parse_args(args)
- try:
- repo_root = git_common.repo_root()
- except subprocess2.CalledProcessError as e:
- sys.stderr.write(e.stderr.decode())
- return e.returncode
- # Make filename relative to the repository root, and cd to the root dir (so
- # all filenames throughout this script are relative to the root).
- filename = os.path.relpath(args.filename, repo_root)
- os.chdir(repo_root)
- # Normalize filename so we can compare it to other filenames git gives us.
- filename = os.path.normpath(filename)
- filename = os.path.normcase(filename)
- ignored_list = list(args.ignored)
- if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
- with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
- ignored_list.extend(parse_ignore_file(ignore_file))
- if args.ignore_file:
- with open(args.ignore_file) as ignore_file:
- ignored_list.extend(parse_ignore_file(ignore_file))
- ignored = set()
- for c in ignored_list:
- try:
- ignored.add(git_common.hash_one(c))
- except subprocess2.CalledProcessError as e:
- # Custom warning string (the message from git-rev-parse is
- # inappropriate).
- sys.stderr.write('warning: unknown revision \'%s\'.\n' % c)
- return hyper_blame(outbuf, ignored, filename, args.revision)
- if __name__ == '__main__': # pragma: no cover
- setup_color.init()
- with git_common.less() as less_input:
- sys.exit(main(sys.argv[1:], less_input))
|