5 달 전 · 9dc0551d52
--- a/git_cl.py
+++ b/git_cl.py
@@ -5815,7 +5815,8 @@ def CMDsplit(parser, args):
 
				         action='store_true',
			
 
				         default=False,
			
 
				         help='List the files and reviewers for each CL that would '
			
 
				-        'be created, but don\'t create branches or CLs.')
			
 
				+        'be created, but don\'t create branches or CLs.\n'
			
 
				+        'You can pass -s in addition to get a more concise summary.')
			
 
				     parser.add_option('--cq-dry-run',
			
 
				                       action='store_true',
			
 
				                       default=False,
			
@@ -5834,7 +5835,7 @@ def CMDsplit(parser, args):
 
				         default=True,
			
 
				         help='Sends your change to the CQ after an approval. Only '
			
 
				         'works on repos that have the Auto-Submit label '
			
 
				-        'enabled')
			
 
				+        'enabled. This is the default option.')
			
 
				     parser.add_option(
			
 
				         '--disable-auto-submit',
			
 
				         action='store_false',
			
@@ -5859,11 +5860,20 @@ def CMDsplit(parser, args):
 
				     parser.add_option('--topic',
			
 
				                       default=None,
			
 
				                       help='Topic to specify when uploading')
			
 
				+    parser.add_option(
			
 
				+        '--target-range',
			
 
				+        type='int',
			
 
				+        default=None,
			
 
				+        nargs=2,
			
 
				+        help='Usage: --target-range <min> <max>\n                  '
			
 
				+        'Use the alternate splitting algorithm which tries '
			
 
				+        'to ensure each CL has between <min> and <max> files, inclusive. '
			
 
				+        'Rarely, some CLs may have fewer files than specified.')
			
 
				     parser.add_option('--from-file',
			
 
				                       type='str',
			
 
				                       default=None,
			
 
				                       help='If present, load the split CLs from the given file '
			
 
				-                      'instead of computing a splitting. These file are '
			
 
				+                      'instead of computing a splitting. These files are '
			
 
				                       'generated each time the script is run.')
			
 
				     parser.add_option(
			
 
				         '-s',
			
@@ -5881,18 +5891,30 @@ def CMDsplit(parser, args):
 
				         default=None,
			
 
				         help='If present, all generated CLs will be sent to the specified '
			
 
				         'reviewer(s) specified, rather than automatically assigned reviewers.\n'
			
 
				-        'Multiple reviewers can be specified as '
			
 
				+        'Multiple reviewers can be specified as:                               '
			
 
				         '--reviewers a@b.com --reviewers c@d.com\n')
			
 
				     parser.add_option(
			
 
				         '--no-reviewers',
			
 
				         action='store_true',
			
 
				         help='If present, generated CLs will not be assigned reviewers. '
			
 
				         'Overrides --reviewers.')
			
 
				+    parser.add_option(
			
 
				+        '--expect-owners-override',
			
 
				+        action='store_true',
			
 
				+        help='If present, the clustering algorithm will group files by '
			
 
				+        'directory only, without considering ownership.\n'
			
 
				+        'No effect if --target-range is not passed.\n'
			
 
				+        'Recommended to be used alongside --reviewers or --no-reviewers.')
			
 
				     options, _ = parser.parse_args(args)
			
 
				 
			
 
				     if not options.description_file and not options.dry_run:
			
 
				         parser.error('No --description flag specified.')
			
 
				 
			
 
				+    if (options.target_range
			
 
				+            and options.target_range[0] > options.target_range[1]):
			
 
				+        parser.error('First argument to --target-range cannot '
			
 
				+                     'be greater than the second argument.')
			
 
				+
			
 
				     if options.no_reviewers:
			
 
				         options.reviewers = []
			
 
				 
			
@@ -5903,7 +5925,9 @@ def CMDsplit(parser, args):
 
				                             Changelist, WrappedCMDupload, options.dry_run,
			
 
				                             options.summarize, options.reviewers,
			
 
				                             options.cq_dry_run, options.enable_auto_submit,
			
 
				-                            options.max_depth, options.topic, options.from_file,
			
 
				+                            options.max_depth, options.topic,
			
 
				+                            options.target_range,
			
 
				+                            options.expect_owners_override, options.from_file,
			
 
				                             settings.GetRoot())
			
 
				 
			
 
				 
			
--- a/split_cl.py
+++ b/split_cl.py
@@ -7,6 +7,7 @@
 
				 import collections
			
 
				 import dataclasses
			
 
				 import hashlib
			
 
				+import math
			
 
				 import os
			
 
				 import re
			
 
				 import tempfile
			
@@ -197,17 +198,24 @@ def FormatDescriptionOrComment(txt, desc):
 
				     return replaced_txt
			
 
				 
			
 
				 
			
 
				-def AddUploadedByGitClSplitToDescription(description):
			
 
				+def AddUploadedByGitClSplitToDescription(description, is_experimental=False):
			
 
				     """Adds a 'This CL was uploaded by git cl split.' line to |description|.
			
 
				 
			
 
				     The line is added before footers, or at the end of |description| if it has
			
 
				     no footers.
			
 
				     """
			
 
				+    if is_experimental:
			
 
				+        new_lines = [
			
 
				+            'This CL was uploaded by an experimental version of git cl split',
			
 
				+            '(https://crbug.com/389069356).'
			
 
				+        ]
			
 
				+    else:
			
 
				+        new_lines = ['This CL was uploaded by git cl split.']
			
 
				     split_footers = git_footers.split_footers(description)
			
 
				     lines = split_footers[0]
			
 
				     if lines[-1] and not lines[-1].isspace():
			
 
				         lines = lines + ['']
			
 
				-    lines = lines + ['This CL was uploaded by git cl split.']
			
 
				+    lines = lines + new_lines
			
 
				     if split_footers[1]:
			
 
				         lines += [''] + split_footers[1]
			
 
				     return '\n'.join(lines)
			
@@ -387,14 +395,16 @@ def PrintSummary(cl_infos, refactor_branch):
 
				             'The infra team reserves the right to cancel '
			
 
				             'your jobs if they are overloading the CQ.\n\n'
			
 
				             '(Alternatively, you can reduce the number of CLs created by '
			
 
				-            'using the --max-depth option. Pass --dry-run to examine the '
			
 
				+            'using the --max-depth option, or altering the arguments to '
			
 
				+            '--target-range, as appropriate. Pass --dry-run to examine the '
			
 
				             'CLs which will be created until you are happy with the '
			
 
				             'results.)')
			
 
				 
			
 
				 
			
 
				 def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
			
 
				             summarize, reviewers_override, cq_dry_run, enable_auto_submit,
			
 
				-            max_depth, topic, from_file, repository_root):
			
 
				+            max_depth, topic, target_range, expect_owners_override, from_file,
			
 
				+            repository_root):
			
 
				     """"Splits a branch into smaller branches and uploads CLs.
			
 
				 
			
 
				     Args:
			
@@ -414,10 +424,10 @@ def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
 
				     Returns:
			
 
				         0 in case of success. 1 in case of error.
			
 
				     """
			
 
				-
			
 
				     description = LoadDescription(description_file, dry_run)
			
 
				-    description = AddUploadedByGitClSplitToDescription(description)
			
 
				 
			
 
				+    description = AddUploadedByGitClSplitToDescription(
			
 
				+        description, is_experimental=target_range)
			
 
				     comment = gclient_utils.FileRead(comment_file) if comment_file else None
			
 
				 
			
 
				     EnsureInGitRepository()
			
@@ -443,6 +453,10 @@ def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
 
				 
			
 
				     if from_file:
			
 
				         cl_infos = LoadSplittingFromFile(from_file, files_on_disk=files)
			
 
				+    elif target_range:
			
 
				+        min_files, max_files = target_range
			
 
				+        cl_infos = GroupFilesByDirectory(cl, author, expect_owners_override,
			
 
				+                                         files, min_files, max_files)
			
 
				     else:
			
 
				         files_split_by_reviewers = SelectReviewersForFiles(
			
 
				             cl, author, files, max_depth)
			
@@ -783,6 +797,71 @@ def EditSplittingInteractively(
 
				 # Code for the clustering-based splitting algorithm.
			
 
				 ################################################################################
			
 
				 
			
 
				+
			
 
				+def GroupFilesByDirectory(cl, author: str, expect_owners_override: bool,
			
 
				+                          all_files: Tuple[str, str], min_files: int,
			
 
				+                          max_files: int) -> List[CLInfo]:
			
 
				+    """
			
 
				+    Group the contents of |all_files| into clusters of size between |min_files|
			
 
				+    and |max_files|, inclusive, based on their directory structure. Assign one
			
 
				+    reviewer to each group to create a CL. If |expect_owners_override| is true,
			
 
				+    consider only the directory structure of the files, ignoring ownership.
			
 
				+
			
 
				+    May rarely create groups with fewer than |min_files| files, or assign
			
 
				+    multiple reviewers to a single CL.
			
 
				+
			
 
				+    Args:
			
 
				+        cl: Changelist class instance, for calling owners methods
			
 
				+        author: Email of person running the script; never assigned as a reviewer
			
 
				+    """
			
 
				+
			
 
				+    # Record the actions associated with each file because the clustering
			
 
				+    # algorithm just takes filenames
			
 
				+    actions_by_file = {}
			
 
				+    file_paths = []
			
 
				+    for (action, file) in all_files:
			
 
				+        actions_by_file[file] = action
			
 
				+        file_paths.append(file)
			
 
				+
			
 
				+    reviewers_so_far = []
			
 
				+    cls = []
			
 
				+    # Go through the clusters by path length so that we're likely to choose
			
 
				+    # top-level owners earlier
			
 
				+    for (directories, files) in sorted(
			
 
				+            ClusterFiles(expect_owners_override, file_paths, min_files,
			
 
				+                         max_files)):
			
 
				+        # Use '/' as a path separator in the branch name and the CL description
			
 
				+        # and comment.
			
 
				+        directories = [
			
 
				+            directory.replace(os.path.sep, '/') for directory in directories
			
 
				+        ]
			
 
				+        files_with_actions = [(actions_by_file[file], file) for file in files]
			
 
				+
			
 
				+        # Try to find a reviewer. If some of the files have noparent set,
			
 
				+        # we'll likely get multiple reviewers. Don't consider reviewers we've
			
 
				+        # already assigned something to.
			
 
				+        # FIXME: Rather than excluding existing reviewers, it would be better
			
 
				+        # to just penalize them, but still choose them over reviewers who have
			
 
				+        # a worse score. At the moment, owners_client doesn't support anything
			
 
				+        # to do with the score.
			
 
				+        reviewers = cl.owners_client.SuggestMinimalOwners(
			
 
				+            files,
			
 
				+            exclude=[author, cl.owners_client.EVERYONE] + reviewers_so_far)
			
 
				+
			
 
				+        # Retry without excluding existing reviewers if we couldn't find any.
			
 
				+        # This is very unlikely since there are many fallback owners.
			
 
				+        if not reviewers:
			
 
				+            reviewers = cl.owners_client.SuggestMinimalOwners(
			
 
				+                directories, exclude=[author, cl.owners_client.EVERYONE])
			
 
				+
			
 
				+        reviewers_so_far.extend(reviewers)
			
 
				+        cls.append(
			
 
				+            CLInfo(set(reviewers), files_with_actions,
			
 
				+                   FormatDirectoriesForPrinting(directories)))
			
 
				+
			
 
				+    return cls
			
 
				+
			
 
				+
			
 
				 ### Trie Code
			
 
				 
			
 
				 
			
@@ -864,3 +943,137 @@ class DirectoryTrie():
 
				         for subdir in self.subdirectories.values():
			
 
				             files += subdir.ToList()
			
 
				         return files
			
 
				+
			
 
				+
			
 
				+### Clustering code
			
 
				+
			
 
				+# Convenience type: a "bin" represents a collection of files:
			
 
				+# it tracks their prefix(es) and the list of files themselves.
			
 
				+# Both elements are string lists.
			
 
				+Bin = collections.namedtuple("Bin", "prefixes files")
			
 
				+
			
 
				+
			
 
				+def PackFiles(max_size: int, files_to_pack: List[Bin]) -> List[Bin]:
			
 
				+    """
			
 
				+    Simple bin packing algorithm: given a list of small bins, consolidate them
			
 
				+    into as few larger bins as possible, where each bin can hold at most
			
 
				+    |max_size| files.
			
 
				+    """
			
 
				+    bins = []
			
 
				+    # Guess how many bins we'll need ahead of time so we can spread things
			
 
				+    # between them. We'll add more bins later if necessary
			
 
				+    expected_bins_needed = math.ceil(
			
 
				+        sum(len(bin.files) for bin in files_to_pack) / max_size)
			
 
				+    expected_avg_bin_size = math.ceil(
			
 
				+        sum(len(bin.files) for bin in files_to_pack) / expected_bins_needed)
			
 
				+    for _ in range(expected_bins_needed):
			
 
				+        bins.append(Bin([], []))
			
 
				+
			
 
				+    # Sort by number of files, decreasing
			
 
				+    sorted_by_num_files = sorted(files_to_pack, key=lambda bin: -len(bin.files))
			
 
				+
			
 
				+    # Invariant: the least-filled bin is always the first element of |bins|
			
 
				+    # This ensures we spread things between bins as much as possible.
			
 
				+    for (prefixes, files) in sorted_by_num_files:
			
 
				+        b = bins[0]
			
 
				+        if len(b.files) + len(files) <= max_size:
			
 
				+            b[0].extend(prefixes)
			
 
				+            b[1].extend(files)
			
 
				+        else:
			
 
				+            # Since the first bin is the emptiest, if we failed to fit in
			
 
				+            # that we don't need to try any others.
			
 
				+
			
 
				+            # If these files alone are too large, split them up into
			
 
				+            # groups of size |expected_avg_bin_size|
			
 
				+            if len(files) > max_size:
			
 
				+                bins.extend([
			
 
				+                    Bin(prefixes, files[i:i + expected_avg_bin_size])
			
 
				+                    for i in range(0, len(files), expected_avg_bin_size)
			
 
				+                ])
			
 
				+            else:
			
 
				+                bins.append(Bin(prefixes, files))
			
 
				+
			
 
				+        # Maintain invariant
			
 
				+        bins.sort(key=lambda bin: len(bin.files))
			
 
				+    return [bin for bin in bins if len(bin.files) > 0]
			
 
				+
			
 
				+
			
 
				+def ClusterFiles(expect_owners_override: bool, files: List[str], min_files: int,
			
 
				+                 max_files: int) -> List[Bin]:
			
 
				+    """
			
 
				+    Group the entries of |files| into clusters of size between |min_files| and
			
 
				+    |max_files|, inclusive. Guarantees that the size does not exceed
			
 
				+    |max_files|, but the size may rarely be less than |min_files|. If
			
 
				+    |expect_owners_override| is true, don't consider ownership when clustering,
			
 
				+    only directory structure.
			
 
				+
			
 
				+    Clustering strategy for a given directory:
			
 
				+    1. Try to group each subdirectory independently
			
 
				+    2. Group any remaining files as follows:
			
 
				+        2a. If there are less than |min_files| files and the folder has a parent,
			
 
				+            give up and let the parent folder handle it.
			
 
				+        2c. Otherwise, if there are at most |max_files| files, create one
			
 
				+            cluster.
			
 
				+        2c. Finally, if there are more than |max_files| files, create several
			
 
				+            clusters of size less than |max_files|.
			
 
				+    """
			
 
				+    trie = DirectoryTrie(expect_owners_override)
			
 
				+    trie.AddFiles([file.split(os.path.sep) for file in files])
			
 
				+    clusters: List[Bin] = []
			
 
				+
			
 
				+    def ClusterDirectory(current_dir: DirectoryTrie) -> List[str]:
			
 
				+        """
			
 
				+        Attempt to cluster the files for a directory, by grouping them into
			
 
				+        Bins and appending the bins to |clusters|.
			
 
				+        Returns a list of files that weren't able to be clustered (because
			
 
				+        there weren't at least |min_files| files).
			
 
				+        """
			
 
				+        # Track all the files we need to handle in this directory
			
 
				+        unclustered_files: List[Bin] = []
			
 
				+
			
 
				+        # Record any files that live in this directory directly
			
 
				+        if len(current_dir.files) > 0:
			
 
				+            unclustered_files.append(
			
 
				+                Bin([current_dir.prefix], current_dir.files))
			
 
				+
			
 
				+        # Step 1: Try to cluster each subdirectory independently
			
 
				+        for subdir in current_dir.subdirectories.values():
			
 
				+            unclustered_files_in_subdir = ClusterDirectory(subdir)
			
 
				+            # If not all files were submitted, record them
			
 
				+            if len(unclustered_files_in_subdir) > 0:
			
 
				+                unclustered_files.append(
			
 
				+                    Bin([subdir.prefix], unclustered_files_in_subdir))
			
 
				+
			
 
				+        # A flattened list containing just the names of all unclustered files
			
 
				+        unclustered_files_names_only = [
			
 
				+            file for bin in unclustered_files for file in bin.files
			
 
				+        ]
			
 
				+
			
 
				+        if len(unclustered_files_names_only) == 0:
			
 
				+            return []
			
 
				+
			
 
				+        # Step 2a: If we don't have enough files for a cluster and it's possible
			
 
				+        # to recurse upward, do so
			
 
				+        if (len(unclustered_files_names_only) < min_files
			
 
				+                and current_dir.has_parent):
			
 
				+            return unclustered_files_names_only
			
 
				+
			
 
				+        # Step 2b, 2c: Create one or more clusters from the unclustered files
			
 
				+        # by appending to the |clusters| variable in the outer scope
			
 
				+        nonlocal clusters
			
 
				+        if len(unclustered_files_names_only) <= max_files:
			
 
				+            clusters.append(
			
 
				+                Bin([current_dir.prefix], unclustered_files_names_only))
			
 
				+        else:
			
 
				+            clusters += PackFiles(max_files, unclustered_files)
			
 
				+
			
 
				+        return []
			
 
				+
			
 
				+    unclustered_paths = ClusterDirectory(trie)
			
 
				+    if (len(unclustered_paths) > 0):
			
 
				+        EmitWarning(
			
 
				+            'Not all files were assigned to a CL!\n'
			
 
				+            'This should be impossible, file a bug.\n'
			
 
				+            f'{len(unclustered_paths)} Unassigned files: {unclustered_paths}')
			
 
				+
			
 
				+    return clusters
			
--- a/tests/git_cl_test.py
+++ b/tests/git_cl_test.py
@@ -5706,39 +5706,40 @@ class CMDSplitTestCase(CMDTestCaseBase):
 
				 
			
 
				     def setUp(self):
			
 
				         super(CMDTestCaseBase, self).setUp()
			
 
				-        mock.patch('git_cl.Settings.GetRoot', return_value='root').start()
			
 
				-
			
 
				-    @mock.patch("split_cl.SplitCl", return_value=0)
			
 
				-    @mock.patch("git_cl.OptionParser.error", side_effect=ParserErrorMock)
			
 
				-    def testDescriptionFlagRequired(self, _, mock_split_cl):
			
 
				+        self.mock_get_root = mock.patch('git_cl.Settings.GetRoot',
			
 
				+                                        return_value='root').start()
			
 
				+        self.mock_split_cl = mock.patch("split_cl.SplitCl",
			
 
				+                                        return_value=0).start()
			
 
				+        self.mock_parser_error = mock.patch(
			
 
				+            "git_cl.OptionParser.error", side_effect=ParserErrorMock).start()
			
 
				+
			
 
				+    def testDescriptionFlagRequired(self):
			
 
				         # --description-file is mandatory...
			
 
				         self.assertRaises(ParserErrorMock, git_cl.main, ['split'])
			
 
				-        self.assertEqual(mock_split_cl.call_count, 0)
			
 
				+        self.assertEqual(self.mock_split_cl.call_count, 0)
			
 
				 
			
 
				         self.assertEqual(git_cl.main(['split', '--description=SomeFile.txt']),
			
 
				                          0)
			
 
				-        self.assertEqual(mock_split_cl.call_count, 1)
			
 
				+        self.assertEqual(self.mock_split_cl.call_count, 1)
			
 
				 
			
 
				         # ...unless we're doing a dry run
			
 
				-        mock_split_cl.reset_mock()
			
 
				+        self.mock_split_cl.reset_mock()
			
 
				         self.assertEqual(git_cl.main(['split', '-n']), 0)
			
 
				-        self.assertEqual(mock_split_cl.call_count, 1)
			
 
				+        self.assertEqual(self.mock_split_cl.call_count, 1)
			
 
				 
			
 
				-    @mock.patch("split_cl.SplitCl", return_value=0)
			
 
				-    @mock.patch("git_cl.OptionParser.error", side_effect=ParserErrorMock)
			
 
				-    def testReviewerParsing(self, _, mock_split_cl):
			
 
				+    def testReviewerParsing(self):
			
 
				         """Make sure we correctly parse various combinations of --reviewers"""
			
 
				 
			
 
				         # Helper function to pull out the reviewers arg and compare it
			
 
				         def testOneSetOfFlags(flags, expected):
			
 
				             self.assertEqual(git_cl.main(['split', '-n'] + flags), 0)
			
 
				-            mock_split_cl.assert_called_once()
			
 
				+            self.mock_split_cl.assert_called_once()
			
 
				             # It's unfortunate that there's no better way to get the argument
			
 
				             # than to hardcode its number, unless we switch to using keyword
			
 
				             # arguments everywhere or pass the options in directly.
			
 
				-            reviewers_arg = mock_split_cl.call_args.args[6]
			
 
				+            reviewers_arg = self.mock_split_cl.call_args.args[6]
			
 
				             self.assertEqual(reviewers_arg, expected)
			
 
				-            mock_split_cl.reset_mock()
			
 
				+            self.mock_split_cl.reset_mock()
			
 
				 
			
 
				         # If no --reviewers flag is passed, we should get None
			
 
				         testOneSetOfFlags([], None)
			
@@ -5750,6 +5751,34 @@ class CMDSplitTestCase(CMDTestCaseBase):
 
				         testOneSetOfFlags(['--no-reviewers'], [])
			
 
				         testOneSetOfFlags(['--reviewers', 'a@b.com', '--no-reviewers'], [])
			
 
				 
			
 
				+    def testTargetRangeParsing(self):
			
 
				+        # Helper function to pull out the target_range arg and compare it
			
 
				+        def testOneSetOfFlags(flags, expected):
			
 
				+            self.assertEqual(git_cl.main(['split', '-n'] + flags), 0)
			
 
				+            self.mock_split_cl.assert_called_once()
			
 
				+            # It's unfortunate that there's no better way to get the argument
			
 
				+            # than to hardcode its number, unless we switch to using keyword
			
 
				+            # arguments everywhere or pass the options in directly.
			
 
				+            target_range = self.mock_split_cl.call_args.args[11]
			
 
				+            self.assertEqual(target_range, expected)
			
 
				+            self.mock_split_cl.reset_mock()
			
 
				+
			
 
				+        def ensureFailure(flags):
			
 
				+            self.assertRaises(ParserErrorMock, git_cl.main,
			
 
				+                              (['split', '-n'] + flags))
			
 
				+
			
 
				+        testOneSetOfFlags([], None)
			
 
				+        testOneSetOfFlags(["--target-range", "3", "5"], (3, 5))
			
 
				+        testOneSetOfFlags(["--target-range", "3", "3"], (3, 3))
			
 
				+        # Can't have second arg larger than first
			
 
				+        ensureFailure(["--target-range", "5", "3"])
			
 
				+        # Need exactly two args
			
 
				+        ensureFailure(["--target-range"])
			
 
				+        ensureFailure(["--target-range", "5"])
			
 
				+        # Only accepts int args
			
 
				+        ensureFailure(["--target-range", "5", "six"])
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     logging.basicConfig(
			
 
				         level=logging.DEBUG if '-v' in sys.argv else logging.ERROR)
			
--- a/tests/split_cl_test.py
+++ b/tests/split_cl_test.py
@@ -33,17 +33,27 @@ class SplitClTest(unittest.TestCase):
 
				         footers = 'Bug: 12345'
			
 
				 
			
 
				         added_line = 'This CL was uploaded by git cl split.'
			
 
				+        experimental_lines = ("This CL was uploaded by an experimental version "
			
 
				+                              "of git cl split\n"
			
 
				+                              "(https://crbug.com/389069356).")
			
 
				 
			
 
				         # Description without footers
			
 
				         self.assertEqual(
			
 
				             split_cl.AddUploadedByGitClSplitToDescription(description),
			
 
				             description + added_line)
			
 
				+
			
 
				         # Description with footers
			
 
				         self.assertEqual(
			
 
				             split_cl.AddUploadedByGitClSplitToDescription(description +
			
 
				                                                           footers),
			
 
				             description + added_line + '\n\n' + footers)
			
 
				 
			
 
				+        # Description with footers and experimental flag
			
 
				+        self.assertEqual(
			
 
				+            split_cl.AddUploadedByGitClSplitToDescription(
			
 
				+                description + footers, True),
			
 
				+            description + experimental_lines + '\n\n' + footers)
			
 
				+
			
 
				     @mock.patch("split_cl.EmitWarning")
			
 
				     def testFormatDescriptionOrComment(self, mock_emit_warning):
			
 
				         description = "Converted use of X to Y in $description."
			
@@ -308,7 +318,7 @@ class SplitClTest(unittest.TestCase):
 
				 
			
 
				             split_cl.SplitCl(description_file, None, mock.Mock(), mock.Mock(),
			
 
				                              dry_run, summarize, reviewers_override, False,
			
 
				-                             False, None, None, None, None)
			
 
				+                             False, None, None, None, None, None, None)
			
 
				 
			
 
				     # Save for re-use
			
 
				     files_split_by_reviewers = {
			
@@ -555,7 +565,6 @@ class SplitClTest(unittest.TestCase):
 
				             self.assertEqual(contents, "".join(written_lines))
			
 
				             mock_file_write.reset_mock()
			
 
				 
			
 
				-
			
 
				     @mock.patch("os.path.isfile", return_value=False)
			
 
				     def testDirectoryTrie(self, _):
			
 
				         """
			
@@ -590,6 +599,60 @@ class SplitClTest(unittest.TestCase):
 
				         self.assertEqual(trie.subdirectories["a"].subdirectories["b"].prefix,
			
 
				                          os.path.join("a", "b"))
			
 
				 
			
 
				+    @mock.patch("os.path.isfile", return_value=False)
			
 
				+    def testClusterFiles(self, _):
			
 
				+        """
			
 
				+        Make sure ClusterFiles returns sensible results for some sample inputs.
			
 
				+        """
			
 
				+
			
 
				+        def compareClusterOutput(clusters: list[split_cl.Bin],
			
 
				+                                 file_groups: list[list[str]]):
			
 
				+            """
			
 
				+            Ensure that ClusterFiles grouped files the way we expected it to.
			
 
				+            """
			
 
				+            clustered_files = sorted([sorted(bin.files) for bin in clusters])
			
 
				+            file_groups = sorted([sorted(grp) for grp in file_groups])
			
 
				+            self.assertEqual(clustered_files, file_groups)
			
 
				+
			
 
				+        # The clustering code uses OS paths so we need to do the same here
			
 
				+        path_abc = os.path.join("a", "b", "c.cc")
			
 
				+        path_abd = os.path.join("a", "b", "d.h")
			
 
				+        path_aefgh = os.path.join("a", "e", "f", "g", "h.hpp")
			
 
				+        path_ijk = os.path.join("i", "j", "k.cc")
			
 
				+        path_ilm = os.path.join("i", "l", "m.cc")
			
 
				+        path_an = os.path.join("a", "n.cpp")
			
 
				+        path_top = os.path.join("top.gn")
			
 
				+        files = [
			
 
				+            path_abc, path_abd, path_aefgh, path_ijk, path_ilm, path_an,
			
 
				+            path_top
			
 
				+        ]
			
 
				+
			
 
				+        def checkClustering(min_files, max_files, expected):
			
 
				+            clusters = split_cl.ClusterFiles(False, files, min_files, max_files)
			
 
				+            compareClusterOutput(clusters, expected)
			
 
				+
			
 
				+        # Each file gets its own cluster
			
 
				+        individual_files = [[file] for file in files]
			
 
				+        checkClustering(1, 1, individual_files)
			
 
				+
			
 
				+        # Put both entries of a/b in the same cluster, everything else alone
			
 
				+        ab_together = [[path_abc, path_abd], [path_aefgh], [path_ijk],
			
 
				+                       [path_ilm], [path_an], [path_top]]
			
 
				+        checkClustering(1, 2, ab_together)
			
 
				+        checkClustering(1, 100, ab_together)
			
 
				+
			
 
				+        # Groups of 2: a/b, rest of a/, all of i/.
			
 
				+        a_two_groups = [[path_abc, path_abd], [path_aefgh, path_an],
			
 
				+                        [path_ijk, path_ilm], [path_top]]
			
 
				+        checkClustering(2, 2, a_two_groups)
			
 
				+        checkClustering(3, 3, a_two_groups)
			
 
				+
			
 
				+        # Put all of a/ together and all of i/ together.
			
 
				+        # Don't combine top-level directories with things at the root
			
 
				+        by_top_level_dir = [[path_abc, path_abd, path_aefgh, path_an],
			
 
				+                            [path_ijk, path_ilm], [path_top]]
			
 
				+        checkClustering(3, 5, by_top_level_dir)
			
 
				+        checkClustering(100, 200, by_top_level_dir)
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     unittest.main()