split_cl.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. #!/usr/bin/env python3
  2. # Copyright 2017 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Splits a branch into smaller branches and uploads CLs."""
  6. import collections
  7. import os
  8. import re
  9. import subprocess2
  10. import sys
  11. import gclient_utils
  12. import git_footers
  13. import scm
  14. import git_common as git
  15. # If a call to `git cl split` will generate more than this number of CLs, the
  16. # command will prompt the user to make sure they know what they're doing. Large
  17. # numbers of CLs generated by `git cl split` have caused infrastructure issues
  18. # in the past.
  19. CL_SPLIT_FORCE_LIMIT = 10
  20. # The maximum number of top reviewers to list. `git cl split` may send many CLs
  21. # to a single reviewer, so the top reviewers with the most CLs sent to them
  22. # will be listed.
  23. CL_SPLIT_TOP_REVIEWERS = 5
  24. FilesAndOwnersDirectory = collections.namedtuple("FilesAndOwnersDirectory",
  25. "files owners_directories")
  26. def EnsureInGitRepository():
  27. """Throws an exception if the current directory is not a git repository."""
  28. git.run('rev-parse')
  29. def CreateBranchForDirectories(prefix, directories, upstream):
  30. """Creates a branch named |prefix| + "_" + |directories[0]| + "_split".
  31. Return false if the branch already exists. |upstream| is used as upstream
  32. for the created branch.
  33. """
  34. existing_branches = set(git.branches(use_limit=False))
  35. branch_name = prefix + '_' + directories[0] + '_split'
  36. if branch_name in existing_branches:
  37. return False
  38. git.run('checkout', '-t', upstream, '-b', branch_name)
  39. return True
  40. def FormatDirectoriesForPrinting(directories, prefix=None):
  41. """Formats directory list for printing
  42. Uses dedicated format for single-item list."""
  43. prefixed = directories
  44. if prefix:
  45. prefixed = [(prefix + d) for d in directories]
  46. return str(prefixed) if len(prefixed) > 1 else str(prefixed[0])
  47. def FormatDescriptionOrComment(txt, directories):
  48. """Replaces $directory with |directories| in |txt|."""
  49. to_insert = FormatDirectoriesForPrinting(directories, prefix='/')
  50. return txt.replace('$directory', to_insert)
  51. def AddUploadedByGitClSplitToDescription(description):
  52. """Adds a 'This CL was uploaded by git cl split.' line to |description|.
  53. The line is added before footers, or at the end of |description| if it has
  54. no footers.
  55. """
  56. split_footers = git_footers.split_footers(description)
  57. lines = split_footers[0]
  58. if lines[-1] and not lines[-1].isspace():
  59. lines = lines + ['']
  60. lines = lines + ['This CL was uploaded by git cl split.']
  61. if split_footers[1]:
  62. lines += [''] + split_footers[1]
  63. return '\n'.join(lines)
  64. def UploadCl(refactor_branch, refactor_branch_upstream, directories, files,
  65. description, comment, reviewers, changelist, cmd_upload,
  66. cq_dry_run, enable_auto_submit, topic, repository_root):
  67. """Uploads a CL with all changes to |files| in |refactor_branch|.
  68. Args:
  69. refactor_branch: Name of the branch that contains the changes to upload.
  70. refactor_branch_upstream: Name of the upstream of |refactor_branch|.
  71. directories: Paths to the directories that contain the OWNERS files for
  72. which to upload a CL.
  73. files: List of AffectedFile instances to include in the uploaded CL.
  74. description: Description of the uploaded CL.
  75. comment: Comment to post on the uploaded CL.
  76. reviewers: A set of reviewers for the CL.
  77. changelist: The Changelist class.
  78. cmd_upload: The function associated with the git cl upload command.
  79. cq_dry_run: If CL uploads should also do a cq dry run.
  80. enable_auto_submit: If CL uploads should also enable auto submit.
  81. topic: Topic to associate with uploaded CLs.
  82. """
  83. # Create a branch.
  84. if not CreateBranchForDirectories(refactor_branch, directories,
  85. refactor_branch_upstream):
  86. print('Skipping ' + FormatDirectoriesForPrinting(directories) +
  87. ' for which a branch already exists.')
  88. return
  89. # Checkout all changes to files in |files|.
  90. deleted_files = []
  91. modified_files = []
  92. for action, f in files:
  93. abspath = os.path.abspath(os.path.join(repository_root, f))
  94. if action == 'D':
  95. deleted_files.append(abspath)
  96. else:
  97. modified_files.append(abspath)
  98. if deleted_files:
  99. git.run(*['rm'] + deleted_files)
  100. if modified_files:
  101. git.run(*['checkout', refactor_branch, '--'] + modified_files)
  102. # Commit changes. The temporary file is created with delete=False so that it
  103. # can be deleted manually after git has read it rather than automatically
  104. # when it is closed.
  105. with gclient_utils.temporary_file() as tmp_file:
  106. gclient_utils.FileWrite(
  107. tmp_file, FormatDescriptionOrComment(description, directories))
  108. git.run('commit', '-F', tmp_file)
  109. # Upload a CL.
  110. upload_args = ['-f']
  111. if reviewers:
  112. upload_args.extend(['-r', ','.join(sorted(reviewers))])
  113. if cq_dry_run:
  114. upload_args.append('--cq-dry-run')
  115. if not comment:
  116. upload_args.append('--send-mail')
  117. if enable_auto_submit:
  118. upload_args.append('--enable-auto-submit')
  119. if topic:
  120. upload_args.append('--topic={}'.format(topic))
  121. print('Uploading CL for ' + FormatDirectoriesForPrinting(directories) +
  122. '...')
  123. ret = cmd_upload(upload_args)
  124. if ret != 0:
  125. print('Uploading failed.')
  126. print('Note: git cl split has built-in resume capabilities.')
  127. print('Delete ' + git.current_branch() +
  128. ' then run git cl split again to resume uploading.')
  129. if comment:
  130. changelist().AddComment(FormatDescriptionOrComment(
  131. comment, directories),
  132. publish=True)
  133. def GetFilesSplitByOwners(files, max_depth):
  134. """Returns a map of files split by OWNERS file.
  135. Returns:
  136. A map where keys are paths to directories containing an OWNERS file and
  137. values are lists of files sharing an OWNERS file.
  138. """
  139. files_split_by_owners = {}
  140. for action, path in files:
  141. # normpath() is important to normalize separators here, in prepration
  142. # for str.split() before. It would be nicer to use something like
  143. # pathlib here but alas...
  144. dir_with_owners = os.path.normpath(os.path.dirname(path))
  145. if max_depth >= 1:
  146. dir_with_owners = os.path.join(
  147. *dir_with_owners.split(os.path.sep)[:max_depth])
  148. # Find the closest parent directory with an OWNERS file.
  149. while (dir_with_owners not in files_split_by_owners
  150. and not os.path.isfile(os.path.join(dir_with_owners, 'OWNERS'))):
  151. dir_with_owners = os.path.dirname(dir_with_owners)
  152. files_split_by_owners.setdefault(dir_with_owners, []).append(
  153. (action, path))
  154. return files_split_by_owners
  155. def PrintClInfo(cl_index, num_cls, directories, file_paths, description,
  156. reviewers, cq_dry_run, enable_auto_submit, topic):
  157. """Prints info about a CL.
  158. Args:
  159. cl_index: The index of this CL in the list of CLs to upload.
  160. num_cls: The total number of CLs that will be uploaded.
  161. directories: Paths to directories that contains the OWNERS files for
  162. which to upload a CL.
  163. file_paths: A list of files in this CL.
  164. description: The CL description.
  165. reviewers: A set of reviewers for this CL.
  166. cq_dry_run: If the CL should also be sent to CQ dry run.
  167. enable_auto_submit: If the CL should also have auto submit enabled.
  168. topic: Topic to set for this CL.
  169. """
  170. description_lines = FormatDescriptionOrComment(description,
  171. directories).splitlines()
  172. indented_description = '\n'.join([' ' + l for l in description_lines])
  173. print('CL {}/{}'.format(cl_index, num_cls))
  174. print('Paths: {}'.format(FormatDirectoriesForPrinting(directories)))
  175. print('Reviewers: {}'.format(', '.join(reviewers)))
  176. print('Auto-Submit: {}'.format(enable_auto_submit))
  177. print('CQ Dry Run: {}'.format(cq_dry_run))
  178. print('Topic: {}'.format(topic))
  179. print('\n' + indented_description + '\n')
  180. print('\n'.join(file_paths))
  181. print()
  182. def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
  183. cq_dry_run, enable_auto_submit, max_depth, topic, repository_root):
  184. """"Splits a branch into smaller branches and uploads CLs.
  185. Args:
  186. description_file: File containing the description of uploaded CLs.
  187. comment_file: File containing the comment of uploaded CLs.
  188. changelist: The Changelist class.
  189. cmd_upload: The function associated with the git cl upload command.
  190. dry_run: Whether this is a dry run (no branches or CLs created).
  191. cq_dry_run: If CL uploads should also do a cq dry run.
  192. enable_auto_submit: If CL uploads should also enable auto submit.
  193. max_depth: The maximum directory depth to search for OWNERS files. A
  194. value less than 1 means no limit.
  195. topic: Topic to associate with split CLs.
  196. Returns:
  197. 0 in case of success. 1 in case of error.
  198. """
  199. description = AddUploadedByGitClSplitToDescription(
  200. gclient_utils.FileRead(description_file))
  201. comment = gclient_utils.FileRead(comment_file) if comment_file else None
  202. try:
  203. EnsureInGitRepository()
  204. cl = changelist()
  205. upstream = cl.GetCommonAncestorWithUpstream()
  206. files = [
  207. (action.strip(), f)
  208. for action, f in scm.GIT.CaptureStatus(repository_root, upstream)
  209. ]
  210. if not files:
  211. print('Cannot split an empty CL.')
  212. return 1
  213. author = git.run('config', 'user.email').strip() or None
  214. refactor_branch = git.current_branch()
  215. assert refactor_branch, "Can't run from detached branch."
  216. refactor_branch_upstream = git.upstream(refactor_branch)
  217. assert refactor_branch_upstream, \
  218. "Branch %s must have an upstream." % refactor_branch
  219. if not CheckDescriptionBugLink(description):
  220. return 0
  221. files_split_by_reviewers = SelectReviewersForFiles(
  222. cl, author, files, max_depth)
  223. num_cls = len(files_split_by_reviewers)
  224. print('Will split current branch (' + refactor_branch + ') into ' +
  225. str(num_cls) + ' CLs.\n')
  226. if not dry_run and num_cls > CL_SPLIT_FORCE_LIMIT:
  227. print(
  228. 'This will generate "%r" CLs. This many CLs can potentially'
  229. ' generate too much load on the build infrastructure.\n\n'
  230. 'Please email infra-dev@chromium.org to ensure that this won\'t'
  231. ' break anything. The infra team reserves the right to cancel'
  232. ' your jobs if they are overloading the CQ.\n\n'
  233. '(Alternatively, you can reduce the number of CLs created by'
  234. ' using the --max-depth option. Pass --dry-run to examine the'
  235. ' CLs which will be created until you are happy with the'
  236. ' results.)' % num_cls)
  237. answer = gclient_utils.AskForData('Proceed? (y/n):')
  238. if answer.lower() != 'y':
  239. return 0
  240. cls_per_reviewer = collections.defaultdict(int)
  241. for cl_index, (reviewers, cl_info) in \
  242. enumerate(files_split_by_reviewers.items(), 1):
  243. # Convert reviewers from tuple to set.
  244. reviewer_set = set(reviewers)
  245. if dry_run:
  246. file_paths = [f for _, f in cl_info.files]
  247. PrintClInfo(cl_index, num_cls, cl_info.owners_directories,
  248. file_paths, description, reviewer_set, cq_dry_run,
  249. enable_auto_submit, topic)
  250. else:
  251. UploadCl(refactor_branch, refactor_branch_upstream,
  252. cl_info.owners_directories, cl_info.files, description,
  253. comment, reviewer_set, changelist, cmd_upload,
  254. cq_dry_run, enable_auto_submit, topic, repository_root)
  255. for reviewer in reviewers:
  256. cls_per_reviewer[reviewer] += 1
  257. # List the top reviewers that will be sent the most CLs as a result of
  258. # the split.
  259. reviewer_rankings = sorted(cls_per_reviewer.items(),
  260. key=lambda item: item[1],
  261. reverse=True)
  262. print('The top reviewers are:')
  263. for reviewer, count in reviewer_rankings[:CL_SPLIT_TOP_REVIEWERS]:
  264. print(f' {reviewer}: {count} CLs')
  265. # Go back to the original branch.
  266. git.run('checkout', refactor_branch)
  267. except subprocess2.CalledProcessError as cpe:
  268. sys.stderr.write(cpe.stderr)
  269. return 1
  270. return 0
  271. def CheckDescriptionBugLink(description):
  272. """Verifies that the description contains a bug link.
  273. Examples:
  274. Bug: 123
  275. Bug: chromium:456
  276. Prompts user if the description does not contain a bug link.
  277. """
  278. bug_pattern = re.compile(r"^Bug:\s*(?:[a-zA-Z]+:)?[0-9]+", re.MULTILINE)
  279. matches = re.findall(bug_pattern, description)
  280. answer = 'y'
  281. if not matches:
  282. answer = gclient_utils.AskForData(
  283. 'Description does not include a bug link. Proceed? (y/n):')
  284. return answer.lower() == 'y'
  285. def SelectReviewersForFiles(cl, author, files, max_depth):
  286. """Selects reviewers for passed-in files
  287. Args:
  288. cl: Changelist class instance
  289. author: Email of person running 'git cl split'
  290. files: List of files
  291. max_depth: The maximum directory depth to search for OWNERS files.
  292. A value less than 1 means no limit.
  293. """
  294. info_split_by_owners = GetFilesSplitByOwners(files, max_depth)
  295. info_split_by_reviewers = {}
  296. for (directory, split_files) in info_split_by_owners.items():
  297. # Use '/' as a path separator in the branch name and the CL description
  298. # and comment.
  299. directory = directory.replace(os.path.sep, '/')
  300. file_paths = [f for _, f in split_files]
  301. # Convert reviewers list to tuple in order to use reviewers as key to
  302. # dictionary.
  303. reviewers = tuple(
  304. cl.owners_client.SuggestOwners(
  305. file_paths, exclude=[author, cl.owners_client.EVERYONE]))
  306. if not reviewers in info_split_by_reviewers:
  307. info_split_by_reviewers[reviewers] = FilesAndOwnersDirectory([], [])
  308. info_split_by_reviewers[reviewers].files.extend(split_files)
  309. info_split_by_reviewers[reviewers].owners_directories.append(directory)
  310. return info_split_by_reviewers