Browse Source

Add git squash-branch-tree

This script allows squashing a branch and all of its downstream
branches in one command.

This is useful for squashing stacked branches that have a lot of
commits each just before using `git rebase-update`, as this is
highly likely to run into a lot of rebase conflicts, repeatedly.

This should only be used if the user is okay with losing their
git commit history.

Change-Id: Id28a941d898def73f785d791499c7ee0784e0136
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5659262
Reviewed-by: Gavin Mak <gavinmak@google.com>
Reviewed-by: Josip Sokcevic <sokcevic@chromium.org>
Commit-Queue: Patrick Monette <pmonette@chromium.org>
Patrick Monette 1 year ago
parent
commit
367c6f720d

+ 6 - 0
git-squash-branch-tree

@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+# Copyright 2024 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+. "$(type -P python_runner.sh)"

+ 132 - 0
git_squash_branch_tree.py

@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+# Copyright 2024 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+'''
+Tool to squash all branches and their downstream branches. Useful to avoid
+potential conflicts during a git rebase-update with multiple stacked CLs.
+'''
+
+import argparse
+import collections
+import git_common as git
+import sys
+
+
+# Returns the list of branches that have diverged from their respective upstream
+# branch.
+def get_diverged_branches(tree):
+    diverged_branches = []
+    for branch, upstream_branch in tree.items():
+        # If the merge base of a branch and its upstream is not equal to the
+        # upstream, then it means that both branch diverged.
+        upstream_branch_hash = git.hash_one(upstream_branch)
+        merge_base_hash = git.hash_one(git.get_or_create_merge_base(branch))
+        if upstream_branch_hash != merge_base_hash:
+            diverged_branches.append(branch)
+    return diverged_branches
+
+
+# Returns a dictionary that contains the hash of every branch before the
+# squashing started.
+def get_initial_hashes(tree):
+    initial_hashes = {}
+    for branch, upstream_branch in tree.items():
+        initial_hashes[branch] = git.hash_one(branch)
+        initial_hashes[upstream_branch] = git.hash_one(upstream_branch)
+    return initial_hashes
+
+
+# Returns a dictionary that contains the downstream branches of every branch.
+def get_downstream_branches(tree):
+    downstream_branches = collections.defaultdict(list)
+    for branch, upstream_branch in tree.items():
+        downstream_branches[upstream_branch].append(branch)
+    return downstream_branches
+
+
+# Squash a branch, taking care to rebase the branch on top of the new commit
+# position of its upstream branch.
+def squash_branch(branch, initial_hashes):
+    print('Squashing branch %s.' % branch)
+    assert initial_hashes[branch] == git.hash_one(branch)
+
+    upstream_branch = git.upstream(branch)
+    old_upstream_branch = initial_hashes[upstream_branch]
+
+    # Because the branch's upstream has potentially changed from squashing it,
+    # the current branch is rebased on top of the new upstream.
+    git.run('rebase', '--onto', upstream_branch, old_upstream_branch, branch,
+            '--update-refs')
+
+    # Now do the squashing.
+    git.run('checkout', branch)
+    git.squash_current_branch()
+
+
+# Squashes all branches that are part of the subtree starting at `branch`.
+def squash_subtree(branch, initial_hashes, downstream_branches):
+    # The upstream default never has to be squashed (e.g. origin/main).
+    if branch != git.upstream_default():
+        squash_branch(branch, initial_hashes)
+
+    # Recurse on downstream branches, if any.
+    for downstream_branch in downstream_branches[branch]:
+        squash_subtree(downstream_branch, initial_hashes, downstream_branches)
+
+
+def main(args=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--ignore-no-upstream',
+                        action='store_true',
+                        help='Allows proceeding if any branch has no '
+                        'upstreams.')
+    parser.add_argument('--branch',
+                        '-b',
+                        type=str,
+                        default=git.current_branch(),
+                        help='The name of the branch who\'s subtree must be '
+                        'squashed. Defaults to the current branch.')
+    opts = parser.parse_args(args)
+
+    if git.is_dirty_git_tree('squash-branch-tree'):
+        return 1
+
+    branches_without_upstream, tree = git.get_branch_tree()
+
+    if not opts.ignore_no_upstream and branches_without_upstream:
+        print('Cannot use `git squash-branch-tree` since the following\n'
+              'branches don\'t have an upstream:')
+        for branch in branches_without_upstream:
+            print(f'  - {branch}')
+        print('Use --ignore-no-upstream to ignore this check and proceed.')
+        return 1
+
+    diverged_branches = get_diverged_branches(tree)
+    if diverged_branches:
+        print('Cannot use `git squash-branch-tree` since the following\n'
+              'branches have diverged from their upstream and could cause\n'
+              'conflicts:')
+        for diverged_branch in diverged_branches:
+            print(f'  - {diverged_branch}')
+        return 1
+
+    # Before doing the squashing, save the current branch checked out branch so
+    # we can go back to it at the end.
+    return_branch = git.current_branch()
+
+    initial_hashes = get_initial_hashes(tree)
+    downstream_branches = get_downstream_branches(tree)
+    squash_subtree(opts.branch, initial_hashes, downstream_branches)
+
+    git.run('checkout', return_branch)
+
+    return 0
+
+
+if __name__ == '__main__':  # pragma: no cover
+    try:
+        sys.exit(main(sys.argv[1:]))
+    except KeyboardInterrupt:
+        sys.stderr.write('interrupted\n')
+        sys.exit(1)

+ 4 - 1
testing_support/git_test_utils.py

@@ -289,6 +289,8 @@ class GitRepo(object):
         Args:
             schema - An instance of GitRepoSchema
         """
+        self.last_commit = None
+
         self.repo_path = os.path.realpath(
             tempfile.mkdtemp(dir=self.BASE_TEMP_DIR))
         self.commit_map = {}
@@ -521,7 +523,8 @@ class GitRepoReadOnlyTestBase(GitRepoSchemaTestBase):
         cls.repo = cls.r_schema.reify()
 
     def setUp(self):
-        self.repo.git('checkout', '-f', self.repo.last_commit)
+        if self.repo.last_commit is not None:
+            self.repo.git('checkout', '-f', self.repo.last_commit)
 
     @classmethod
     def tearDownClass(cls):

+ 130 - 0
tests/git_squash_branch_tree_test.py

@@ -0,0 +1,130 @@
+#!/usr/bin/env vpython3
+# coding=utf-8
+# Copyright 2024 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""Tests for git_squash_branch_tree."""
+
+import os
+import sys
+import unittest
+
+DEPOT_TOOLS_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, DEPOT_TOOLS_ROOT)
+
+from testing_support import git_test_utils
+
+import git_squash_branch_tree
+import git_common
+
+git_common.TEST_MODE = True
+
+
+class GitSquashBranchTreeTest(git_test_utils.GitRepoReadWriteTestBase):
+    # Empty repo.
+    REPO_SCHEMA = """
+  """
+
+    def setUp(self):
+        super(GitSquashBranchTreeTest, self).setUp()
+
+        # Note: Using the REPO_SCHEMA wouldn't simplify this test so it is not
+        #       used.
+        #
+        # Create a repo with the follow schema
+        #
+        # main <- branchA <- branchB
+        #            ^
+        #            \ branchC
+        #
+        # where each branch has 2 commits.
+
+        # The repo is empty. Add the first commit or else most commands don't
+        # work, including `git branch`, which doesn't even show the main branch.
+        self.repo.git('commit', '-m', 'First commit', '--allow-empty')
+
+        # Create the first branch downstream from `main` with 2 commits.
+        self.repo.git('checkout', '-B', 'branchA', '--track', 'main')
+        self._createFileAndCommit('fileA1')
+        self._createFileAndCommit('fileA2')
+
+        # Create a branch downstream from `branchA` with 2 commits.
+        self.repo.git('checkout', '-B', 'branchB', '--track', 'branchA')
+        self._createFileAndCommit('fileB1')
+        self._createFileAndCommit('fileB2')
+
+        # Create another branch downstream from `branchA` with 2 commits.
+        self.repo.git('checkout', '-B', 'branchC', '--track', 'branchA')
+        self._createFileAndCommit('fileC1')
+        self._createFileAndCommit('fileC2')
+
+    def testGitSquashBranchTreeDefaultCurrent(self):
+        self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchB'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2)
+
+        # Note: Passing --ignore-no-upstream as this repo has no remote and so
+        # the `main` branch can't have an upstream.
+        self.repo.git('checkout', 'branchB')
+        self.repo.run(git_squash_branch_tree.main, ['--ignore-no-upstream'])
+
+        self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1)
+        self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2)
+
+    def testGitSquashBranchTreeAll(self):
+        self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchB'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2)
+
+        self.repo.run(git_squash_branch_tree.main,
+                      ['--branch', 'branchA', '--ignore-no-upstream'])
+
+        self.assertEqual(self._getCountAheadOfUpstream('branchA'), 1)
+        self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1)
+        self.assertEqual(self._getCountAheadOfUpstream('branchC'), 1)
+
+    def testGitSquashBranchTreeSingle(self):
+        self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchB'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2)
+
+        self.repo.run(git_squash_branch_tree.main,
+                      ['--branch', 'branchB', '--ignore-no-upstream'])
+
+        self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1)
+        self.assertEqual(self._getCountAheadOfUpstream('branchC'), 2)
+
+        self.repo.run(git_squash_branch_tree.main,
+                      ['--branch', 'branchC', '--ignore-no-upstream'])
+
+        self.assertEqual(self._getCountAheadOfUpstream('branchA'), 2)
+        self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1)
+        self.assertEqual(self._getCountAheadOfUpstream('branchC'), 1)
+
+        self.repo.run(git_squash_branch_tree.main,
+                      ['--branch', 'branchA', '--ignore-no-upstream'])
+
+        self.assertEqual(self._getCountAheadOfUpstream('branchA'), 1)
+        self.assertEqual(self._getCountAheadOfUpstream('branchB'), 1)
+        self.assertEqual(self._getCountAheadOfUpstream('branchC'), 1)
+
+    # Creates a file with arbitrary contents and commit it to the current
+    # branch.
+    def _createFileAndCommit(self, filename):
+        with self.repo.open(filename, 'w') as f:
+            f.write('content')
+        self.repo.git('add', filename)
+        self.repo.git_commit('Added file ' + filename)
+
+    # Returns the count of how many commits `branch` is ahead of its upstream.
+    def _getCountAheadOfUpstream(self, branch):
+        upstream = branch + '@{u}'
+        output = self.repo.git('rev-list', '--count',
+                               upstream + '..' + branch).stdout
+        return int(output)
+
+
+if __name__ == '__main__':
+    unittest.main()