split_cl.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775
  1. #!/usr/bin/env python3
  2. # Copyright 2017 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Splits a branch into smaller branches and uploads CLs."""
  6. import collections
  7. import dataclasses
  8. import hashlib
  9. import os
  10. import re
  11. import tempfile
  12. from typing import List, Set, Tuple, Dict, Any
  13. import gclient_utils
  14. import git_footers
  15. import scm
  16. import git_common as git
  17. # If a call to `git cl split` will generate more than this number of CLs, the
  18. # command will prompt the user to make sure they know what they're doing. Large
  19. # numbers of CLs generated by `git cl split` have caused infrastructure issues
  20. # in the past.
  21. CL_SPLIT_FORCE_LIMIT = 10
  22. # The maximum number of top reviewers to list. `git cl split` may send many CLs
  23. # to a single reviewer, so the top reviewers with the most CLs sent to them
  24. # will be listed.
  25. CL_SPLIT_TOP_REVIEWERS = 5
  26. def Emit(*msg: str):
  27. """Wrapper for easier mocking during tests"""
  28. print(*msg)
  29. def EmitWarning(*msg: str):
  30. print("Warning: ", *msg)
  31. def HashList(lst: List[Any]) -> str:
  32. """
  33. Hash a list, returning a positive integer. Lists with identical elements
  34. should have the same hash, regardless of order.
  35. """
  36. # We need a bytes-like object for hashlib algorithms
  37. byts = bytes().join(
  38. (action + file).encode() for action, file in sorted(lst))
  39. # No security implication: we just need a deterministic output
  40. hashed = hashlib.sha1(byts)
  41. return hashed.hexdigest()[:10]
  42. FilesAndOwnersDirectory = collections.namedtuple("FilesAndOwnersDirectory",
  43. "files owners_directories")
  44. @dataclasses.dataclass
  45. class CLInfo:
  46. """
  47. Data structure representing a single CL. The script will split the large CL
  48. into a list of these.
  49. Fields:
  50. - reviewers: the reviewers the CL will be sent to.
  51. - files: a list of <action>, <file> pairs in the CL.
  52. Has the same format as `git status`.
  53. - description: a string describing the CL. Typically the list of affected
  54. directories. Only used for replacing $description in
  55. the user-provided CL description.
  56. """
  57. # Have to use default_factory because lists are mutable
  58. reviewers: Set[str] = dataclasses.field(default_factory=set)
  59. files: List[Tuple[str, str]] = dataclasses.field(default_factory=list)
  60. # This is only used for formatting in the CL description, so it just
  61. # has to be convertible to string.
  62. description: Any = ""
  63. def FormatForPrinting(self) -> str:
  64. """
  65. Format the CLInfo for printing to a file in a human-readable format.
  66. """
  67. # Don't quote the reviewer emails in the output
  68. reviewers_str = ", ".join(self.reviewers)
  69. lines = [
  70. f"Reviewers: [{reviewers_str}]", f"Description: {self.description}"
  71. ] + [f"{action}, {file}" for (action, file) in self.files]
  72. return "\n".join(lines)
  73. def CLInfoFromFilesAndOwnersDirectoriesDict(
  74. d: Dict[Tuple[str], FilesAndOwnersDirectory]) -> List[CLInfo]:
  75. """
  76. Transform a dictionary mapping reviewer tuples to FilesAndOwnersDirectories
  77. into a list of CLInfo
  78. """
  79. cl_infos = []
  80. for (reviewers, fod) in d.items():
  81. cl_infos.append(
  82. CLInfo(set(reviewers), fod.files,
  83. FormatDirectoriesForPrinting(fod.owners_directories)))
  84. return cl_infos
  85. def EnsureInGitRepository():
  86. """Throws an exception if the current directory is not a git repository."""
  87. git.run('rev-parse')
  88. def CreateBranchName(prefix: str, files: List[Tuple[str, str]]) -> str:
  89. """
  90. Given a sub-CL as a list of (action, file) pairs, create a unique and
  91. deterministic branch name for it.
  92. The name has the format <prefix>_<dirname>_<hash(files)>_split.
  93. """
  94. file_names = [file for _, file in files]
  95. if len(file_names) == 1:
  96. # Only one file, just use its directory as the common path
  97. common_path = os.path.dirname(file_names[0])
  98. else:
  99. common_path = os.path.commonpath(file_names)
  100. if not common_path:
  101. # Files have nothing in common at all. Unlikely but possible.
  102. common_path = "None"
  103. # Replace path delimiter with underscore in common_path.
  104. common_path = common_path.replace(os.path.sep, '_')
  105. return f"{prefix}_{HashList(files)}_{common_path}_split"
  106. def CreateBranchForOneCL(prefix: str, files: List[Tuple[str, str]],
  107. upstream: str) -> bool:
  108. """Creates a branch named |prefix| + "_" + |hash(files)| + "_split".
  109. Return false if the branch already exists. |upstream| is used as upstream
  110. for the created branch.
  111. """
  112. branches_on_disk = set(git.branches(use_limit=False))
  113. branch_name = CreateBranchName(prefix, files)
  114. if branch_name in branches_on_disk:
  115. return False
  116. git.run('checkout', '-t', upstream, '-b', branch_name)
  117. return True
  118. def ValidateExistingBranches(prefix: str, cl_infos: List[CLInfo]) -> bool:
  119. """
  120. Check if there are splitting branches left over from a previous run.
  121. We only allow branches to exist if we're resuming a previous upload,
  122. in which case we require that the existing branches are a subset of
  123. the branches we're going to generate.
  124. """
  125. branches_on_disk = set(
  126. branch for branch in git.branches(use_limit=False)
  127. if branch.startswith(prefix + "_") and branch.endswith("_split"))
  128. branches_to_be_made = set(
  129. CreateBranchName(prefix, info.files) for info in cl_infos)
  130. if not branches_on_disk.issubset(branches_to_be_made):
  131. Emit("It seems like you've already run `git cl split` on this branch.\n"
  132. "If you're resuming a previous upload, you must pass in the "
  133. "same splitting as before, using the --from-file option.\n"
  134. "If you're starting a new upload, please clean up existing split "
  135. f"branches (starting with '{prefix}_' and ending with '_split'), "
  136. "and re-run the tool.")
  137. Emit("The following branches need to be cleaned up:\n")
  138. for branch in branches_on_disk - branches_to_be_made:
  139. Emit(branch)
  140. return False
  141. return True
  142. def FormatDirectoriesForPrinting(directories: List[str],
  143. prefix: str = None) -> str:
  144. """Formats directory list for printing
  145. Uses dedicated format for single-item list."""
  146. prefixed = directories
  147. if prefix:
  148. prefixed = [(prefix + d) for d in directories]
  149. return str(prefixed[0]) if len(prefixed) == 1 else str(prefixed)
  150. def FormatDescriptionOrComment(txt, desc):
  151. """Replaces $description with |desc| in |txt|."""
  152. # TODO(389069356): Remove support for $directory entirely once it's been
  153. # deprecated for a while.
  154. replaced_txt = txt.replace('$directory', desc)
  155. if txt != replaced_txt:
  156. EmitWarning('Usage of $directory is deprecated and will be removed '
  157. 'in a future update. Please use $description instead, '
  158. 'which has the same behavior by default.\n\n')
  159. replaced_txt = replaced_txt.replace('$description', desc)
  160. return replaced_txt
  161. def AddUploadedByGitClSplitToDescription(description):
  162. """Adds a 'This CL was uploaded by git cl split.' line to |description|.
  163. The line is added before footers, or at the end of |description| if it has
  164. no footers.
  165. """
  166. split_footers = git_footers.split_footers(description)
  167. lines = split_footers[0]
  168. if lines[-1] and not lines[-1].isspace():
  169. lines = lines + ['']
  170. lines = lines + ['This CL was uploaded by git cl split.']
  171. if split_footers[1]:
  172. lines += [''] + split_footers[1]
  173. return '\n'.join(lines)
  174. def UploadCl(refactor_branch, refactor_branch_upstream, cl_description, files,
  175. user_description, saved_splitting_file, comment, reviewers,
  176. changelist, cmd_upload, cq_dry_run, enable_auto_submit, topic,
  177. repository_root):
  178. """Uploads a CL with all changes to |files| in |refactor_branch|.
  179. Args:
  180. refactor_branch: Name of the branch that contains the changes to upload.
  181. refactor_branch_upstream: Name of the upstream of |refactor_branch|.
  182. cl_description: Description of this specific CL, e.g. the list of
  183. affected directories.
  184. files: List of AffectedFile instances to include in the uploaded CL.
  185. user_description: Description provided by user.
  186. comment: Comment to post on the uploaded CL.
  187. reviewers: A set of reviewers for the CL.
  188. changelist: The Changelist class.
  189. cmd_upload: The function associated with the git cl upload command.
  190. cq_dry_run: If CL uploads should also do a cq dry run.
  191. enable_auto_submit: If CL uploads should also enable auto submit.
  192. topic: Topic to associate with uploaded CLs.
  193. """
  194. # Create a branch.
  195. if not CreateBranchForOneCL(refactor_branch, files,
  196. refactor_branch_upstream):
  197. Emit(
  198. f'Skipping existing branch for CL with description: {cl_description}'
  199. )
  200. return
  201. # Checkout all changes to files in |files|.
  202. deleted_files = []
  203. modified_files = []
  204. for action, f in files:
  205. abspath = os.path.abspath(os.path.join(repository_root, f))
  206. if action == 'D':
  207. deleted_files.append(abspath)
  208. else:
  209. modified_files.append(abspath)
  210. if deleted_files:
  211. git.run(*['rm'] + deleted_files)
  212. if modified_files:
  213. git.run(*['checkout', refactor_branch, '--'] + modified_files)
  214. # Commit changes. The temporary file is created with delete=False so that it
  215. # can be deleted manually after git has read it rather than automatically
  216. # when it is closed.
  217. with gclient_utils.temporary_file() as tmp_file:
  218. gclient_utils.FileWrite(
  219. tmp_file,
  220. FormatDescriptionOrComment(user_description, cl_description))
  221. git.run('commit', '-F', tmp_file)
  222. # Upload a CL.
  223. upload_args = ['-f']
  224. if reviewers:
  225. upload_args.extend(['-r', ','.join(sorted(reviewers))])
  226. if cq_dry_run:
  227. upload_args.append('--cq-dry-run')
  228. if not comment:
  229. upload_args.append('--send-mail')
  230. if enable_auto_submit:
  231. upload_args.append('--enable-auto-submit')
  232. if topic:
  233. upload_args.append('--topic={}'.format(topic))
  234. Emit(f'Uploading CL with description: {cl_description} ...')
  235. ret = cmd_upload(upload_args)
  236. if ret != 0:
  237. Emit('Uploading failed.')
  238. Emit('Note: git cl split has built-in resume capabilities.')
  239. Emit(f'Delete {git.current_branch()} then run\n'
  240. f'git cl split --from-file={saved_splitting_file}\n'
  241. 'to resume uploading.')
  242. if comment:
  243. changelist().AddComment(FormatDescriptionOrComment(
  244. comment, cl_description),
  245. publish=True)
  246. def GetFilesSplitByOwners(files, max_depth):
  247. """Returns a map of files split by OWNERS file.
  248. Returns:
  249. A map where keys are paths to directories containing an OWNERS file and
  250. values are lists of files sharing an OWNERS file.
  251. """
  252. files_split_by_owners = {}
  253. for action, path in files:
  254. # normpath() is important to normalize separators here, in prepration
  255. # for str.split() before. It would be nicer to use something like
  256. # pathlib here but alas...
  257. dir_with_owners = os.path.normpath(os.path.dirname(path))
  258. if max_depth >= 1:
  259. dir_with_owners = os.path.join(
  260. *dir_with_owners.split(os.path.sep)[:max_depth])
  261. # Find the closest parent directory with an OWNERS file.
  262. while (dir_with_owners not in files_split_by_owners
  263. and not os.path.isfile(os.path.join(dir_with_owners, 'OWNERS'))):
  264. dir_with_owners = os.path.dirname(dir_with_owners)
  265. files_split_by_owners.setdefault(dir_with_owners, []).append(
  266. (action, path))
  267. return files_split_by_owners
  268. def PrintClInfo(cl_index, num_cls, cl_description, file_paths, user_description,
  269. reviewers, cq_dry_run, enable_auto_submit, topic):
  270. """Prints info about a CL.
  271. Args:
  272. cl_index: The index of this CL in the list of CLs to upload.
  273. num_cls: The total number of CLs that will be uploaded.
  274. cl_description: Description of this specific CL, e.g. the list of
  275. affected directories.
  276. file_paths: A list of files in this CL.
  277. user_description: Description provided by user.
  278. reviewers: A set of reviewers for this CL.
  279. cq_dry_run: If the CL should also be sent to CQ dry run.
  280. enable_auto_submit: If the CL should also have auto submit enabled.
  281. topic: Topic to set for this CL.
  282. """
  283. description_lines = FormatDescriptionOrComment(user_description,
  284. cl_description).splitlines()
  285. indented_description = '\n'.join([' ' + l for l in description_lines])
  286. Emit('CL {}/{}'.format(cl_index, num_cls))
  287. Emit('Paths: {}'.format(cl_description))
  288. Emit('Reviewers: {}'.format(', '.join(reviewers)))
  289. Emit('Auto-Submit: {}'.format(enable_auto_submit))
  290. Emit('CQ Dry Run: {}'.format(cq_dry_run))
  291. Emit('Topic: {}'.format(topic))
  292. Emit('\n' + indented_description + '\n')
  293. Emit('\n'.join(file_paths))
  294. def LoadDescription(description_file, dry_run):
  295. if not description_file:
  296. if not dry_run:
  297. # Parser checks this as well, so should be impossible
  298. raise ValueError(
  299. "Must provide a description file except during dry runs")
  300. return ('Dummy description for dry run.\n'
  301. 'description = $description')
  302. return gclient_utils.FileRead(description_file)
  303. def PrintSummary(cl_infos, refactor_branch):
  304. """Print a brief summary of the splitting so the user
  305. can review it before uploading.
  306. Args:
  307. files_split_by_reviewers: A dictionary mapping reviewer tuples
  308. to the files and directories assigned to them.
  309. """
  310. for info in cl_infos:
  311. Emit(f'Reviewers: {info.reviewers}, files: {len(info.files)}, '
  312. f'description: {info.description}')
  313. num_cls = len(cl_infos)
  314. Emit(f'\nWill split branch {refactor_branch} into {num_cls} CLs. '
  315. 'Please quickly review them before proceeding.\n')
  316. if (num_cls > CL_SPLIT_FORCE_LIMIT):
  317. EmitWarning(
  318. 'Uploading this many CLs may potentially '
  319. 'reach the limit of concurrent runs, imposed on you by the '
  320. 'build infrastructure. Your runs may be throttled as a '
  321. 'result.\n\nPlease email infra-dev@chromium.org if you '
  322. 'have any questions. '
  323. 'The infra team reserves the right to cancel '
  324. 'your jobs if they are overloading the CQ.\n\n'
  325. '(Alternatively, you can reduce the number of CLs created by '
  326. 'using the --max-depth option. Pass --dry-run to examine the '
  327. 'CLs which will be created until you are happy with the '
  328. 'results.)')
  329. def SplitCl(description_file, comment_file, changelist, cmd_upload, dry_run,
  330. summarize, reviewers_override, cq_dry_run, enable_auto_submit,
  331. max_depth, topic, from_file, repository_root):
  332. """"Splits a branch into smaller branches and uploads CLs.
  333. Args:
  334. description_file: File containing the description of uploaded CLs.
  335. comment_file: File containing the comment of uploaded CLs.
  336. changelist: The Changelist class.
  337. cmd_upload: The function associated with the git cl upload command.
  338. dry_run: Whether this is a dry run (no branches or CLs created).
  339. reviewers_override: Either None or a (possibly empty) list of reviewers
  340. all CLs should be sent to.
  341. cq_dry_run: If CL uploads should also do a cq dry run.
  342. enable_auto_submit: If CL uploads should also enable auto submit.
  343. max_depth: The maximum directory depth to search for OWNERS files. A
  344. value less than 1 means no limit.
  345. topic: Topic to associate with split CLs.
  346. Returns:
  347. 0 in case of success. 1 in case of error.
  348. """
  349. description = LoadDescription(description_file, dry_run)
  350. description = AddUploadedByGitClSplitToDescription(description)
  351. comment = gclient_utils.FileRead(comment_file) if comment_file else None
  352. EnsureInGitRepository()
  353. cl = changelist()
  354. upstream = cl.GetCommonAncestorWithUpstream()
  355. files = [(action.strip(), f)
  356. for action, f in scm.GIT.CaptureStatus(repository_root, upstream)]
  357. if not files:
  358. Emit('Cannot split an empty CL.')
  359. return 1
  360. author = git.run('config', 'user.email').strip() or None
  361. refactor_branch = git.current_branch()
  362. assert refactor_branch, "Can't run from detached branch."
  363. refactor_branch_upstream = git.upstream(refactor_branch)
  364. assert refactor_branch_upstream, \
  365. "Branch %s must have an upstream." % refactor_branch
  366. if not dry_run and not CheckDescriptionBugLink(description):
  367. return 0
  368. if from_file:
  369. cl_infos = LoadSplittingFromFile(from_file, files_on_disk=files)
  370. else:
  371. files_split_by_reviewers = SelectReviewersForFiles(
  372. cl, author, files, max_depth)
  373. cl_infos = CLInfoFromFilesAndOwnersDirectoriesDict(
  374. files_split_by_reviewers)
  375. # Note that we do this override even if the list is empty (indicating that
  376. # the user requested CLs not be assigned to any reviewers).
  377. if reviewers_override != None:
  378. for info in cl_infos:
  379. info.reviewers = set(reviewers_override)
  380. if not dry_run:
  381. PrintSummary(cl_infos, refactor_branch)
  382. answer = gclient_utils.AskForData(
  383. 'Proceed? (y/N, or i to edit interactively): ')
  384. if answer.lower() == 'i':
  385. cl_infos, saved_splitting_file = EditSplittingInteractively(
  386. cl_infos, files_on_disk=files)
  387. else:
  388. # Save even if we're continuing, so the user can safely resume an
  389. # aborted upload with the same splitting
  390. saved_splitting_file = SaveSplittingToTempFile(cl_infos)
  391. if answer.lower() != 'y':
  392. return 0
  393. # Make sure there isn't any clutter left over from a previous run
  394. if not ValidateExistingBranches(refactor_branch, cl_infos):
  395. return 0
  396. elif summarize:
  397. PrintSummary(cl_infos, refactor_branch)
  398. cls_per_reviewer = collections.defaultdict(int)
  399. for cl_index, cl_info in enumerate(cl_infos, 1):
  400. if dry_run and summarize:
  401. pass
  402. elif dry_run:
  403. file_paths = [f for _, f in cl_info.files]
  404. PrintClInfo(cl_index, len(cl_infos), cl_info.description,
  405. file_paths, description, cl_info.reviewers, cq_dry_run,
  406. enable_auto_submit, topic)
  407. else:
  408. UploadCl(refactor_branch, refactor_branch_upstream,
  409. cl_info.description, cl_info.files, description,
  410. saved_splitting_file, comment, cl_info.reviewers,
  411. changelist, cmd_upload, cq_dry_run, enable_auto_submit,
  412. topic, repository_root)
  413. for reviewer in cl_info.reviewers:
  414. cls_per_reviewer[reviewer] += 1
  415. # List the top reviewers that will be sent the most CLs as a result of
  416. # the split.
  417. reviewer_rankings = sorted(cls_per_reviewer.items(),
  418. key=lambda item: item[1],
  419. reverse=True)
  420. Emit('The top reviewers are:')
  421. for reviewer, count in reviewer_rankings[:CL_SPLIT_TOP_REVIEWERS]:
  422. Emit(f' {reviewer}: {count} CLs')
  423. if dry_run:
  424. # Wait until now to save the splitting so the file name doesn't get
  425. # washed away by the flood of dry-run printing.
  426. SaveSplittingToTempFile(cl_infos)
  427. # Go back to the original branch.
  428. git.run('checkout', refactor_branch)
  429. return 0
  430. def CheckDescriptionBugLink(description):
  431. """Verifies that the description contains a bug link.
  432. Examples:
  433. Bug: 123
  434. Bug: chromium:456
  435. Prompts user if the description does not contain a bug link.
  436. """
  437. bug_pattern = re.compile(r"^Bug:\s*(?:[a-zA-Z]+:)?[0-9]+", re.MULTILINE)
  438. matches = re.findall(bug_pattern, description)
  439. answer = 'y'
  440. if not matches:
  441. answer = gclient_utils.AskForData(
  442. 'Description does not include a bug link. Proceed? (y/N):')
  443. return answer.lower() == 'y'
  444. def SelectReviewersForFiles(cl, author, files, max_depth):
  445. """Selects reviewers for passed-in files
  446. Args:
  447. cl: Changelist class instance
  448. author: Email of person running 'git cl split'
  449. files: List of files
  450. max_depth: The maximum directory depth to search for OWNERS files.
  451. A value less than 1 means no limit.
  452. """
  453. info_split_by_owners = GetFilesSplitByOwners(files, max_depth)
  454. info_split_by_reviewers = {}
  455. for (directory, split_files) in info_split_by_owners.items():
  456. # Use '/' as a path separator in the branch name and the CL description
  457. # and comment.
  458. directory = directory.replace(os.path.sep, '/')
  459. file_paths = [f for _, f in split_files]
  460. # Convert reviewers list to tuple in order to use reviewers as key to
  461. # dictionary.
  462. reviewers = tuple(
  463. cl.owners_client.SuggestOwners(
  464. file_paths, exclude=[author, cl.owners_client.EVERYONE]))
  465. if not reviewers in info_split_by_reviewers:
  466. info_split_by_reviewers[reviewers] = FilesAndOwnersDirectory([], [])
  467. info_split_by_reviewers[reviewers].files.extend(split_files)
  468. info_split_by_reviewers[reviewers].owners_directories.append(directory)
  469. return info_split_by_reviewers
  470. def SaveSplittingToFile(cl_infos: List[CLInfo], filename: str, silent=False):
  471. """
  472. Writes the listed CLs to the designated file, in a human-readable and
  473. editable format. Include an explanation of the file format at the top,
  474. as well as instructions for how to use it.
  475. """
  476. preamble = (
  477. "# CLs in this file must have the following format:\n"
  478. "# A 'Reviewers: [...]' line, where '...' is a (possibly empty) list "
  479. "of reviewer emails.\n"
  480. "# A 'Description: ...' line, where '...' is any string (by default, "
  481. "the list of directories the files have been pulled from).\n"
  482. "# One or more file lines, consisting of an <action>, <file> pair, in "
  483. "the format output by `git status`.\n\n"
  484. "# Each 'Reviewers' line begins a new CL.\n"
  485. "# To use the splitting in this file, use the --from-file option.\n\n")
  486. cl_string = "\n\n".join([info.FormatForPrinting() for info in cl_infos])
  487. gclient_utils.FileWrite(filename, preamble + cl_string)
  488. if not silent:
  489. Emit(f"Saved splitting to {filename}")
  490. def SaveSplittingToTempFile(cl_infos: List[CLInfo], silent=False):
  491. """
  492. Create a file in the user's temp directory, and save the splitting there.
  493. """
  494. # We can't use gclient_utils.temporary_file because it will be removed
  495. temp_file, temp_name = tempfile.mkstemp(prefix="split_cl_")
  496. os.close(temp_file) # Necessary for windows
  497. SaveSplittingToFile(cl_infos, temp_name, silent)
  498. return temp_name
  499. class ClSplitParseError(Exception):
  500. pass
  501. # Matches 'Reviewers: [...]', extracts the ...
  502. reviewers_re = re.compile(r'Reviewers:\s*\[([^\]]*)\]')
  503. # Matches 'Description: ...', extracts the ...
  504. description_re = re.compile(r'Description:\s*(.+)')
  505. # Matches '<action>, <file>', and extracts both
  506. # <action> must be a valid code (either 1 or 2 letters)
  507. file_re = re.compile(r'([MTADRC]{1,2}),\s*(.+)')
  508. # We use regex parsing instead of e.g. json because it lets us use a much more
  509. # human-readable format, similar to the summary printed in dry runs
  510. def ParseSplittings(lines: List[str]) -> List[CLInfo]:
  511. """
  512. Parse a splitting file. We expect to get a series of lines in the format
  513. of CLInfo.FormatForPrinting. In the following order, we expect to see
  514. - A 'Reviewers: ' line containing a list,
  515. - A 'Description: ' line containing anything, and
  516. - A list of <action>, <path> pairs, each on its own line
  517. Note that this function only transforms the file into a list of CLInfo
  518. (if possible). It does not validate the information; for that, see
  519. ValidateSplitting.
  520. """
  521. cl_infos = []
  522. current_cl_info = None
  523. for line in lines:
  524. line = line.strip()
  525. # Skip empty or commented lines
  526. if not line or line.startswith('#'):
  527. continue
  528. # Start a new CL whenever we see a new Reviewers: line
  529. m = re.fullmatch(reviewers_re, line)
  530. if m:
  531. reviewers_str = m.group(1)
  532. reviewers = [r.strip() for r in reviewers_str.split(",")]
  533. # Account for empty list or trailing comma
  534. if not reviewers[-1]:
  535. reviewers = reviewers[:-1]
  536. if current_cl_info:
  537. cl_infos.append(current_cl_info)
  538. current_cl_info = CLInfo(reviewers=reviewers)
  539. continue
  540. if not current_cl_info:
  541. # Make sure no nonempty lines appear before the first CL
  542. raise ClSplitParseError(
  543. f"Error: Line appears before the first 'Reviewers: ' line:\n{line}"
  544. )
  545. # Description is just used as a description, so any string is fine
  546. m = re.fullmatch(description_re, line)
  547. if m:
  548. if current_cl_info.description:
  549. raise ClSplitParseError(
  550. f"Error parsing line: CL already has a description entry\n{line}"
  551. )
  552. current_cl_info.description = m.group(1).strip()
  553. continue
  554. # Any other line is presumed to be an '<action>, <file>' pair
  555. m = re.fullmatch(file_re, line)
  556. if m:
  557. action, path = m.groups()
  558. current_cl_info.files.append((action, path))
  559. continue
  560. raise ClSplitParseError("Error parsing line: Does not look like\n"
  561. "'Reviewers: [...]',\n"
  562. "'Description: ...', or\n"
  563. f"a pair of '<action>, <file>':\n{line}")
  564. if (current_cl_info):
  565. cl_infos.append(current_cl_info)
  566. return cl_infos
  567. def ValidateSplitting(cl_infos: List[CLInfo], filename: str,
  568. files_on_disk: List[Tuple[str, str]]):
  569. """
  570. Ensure that the provided list of CLs is a valid splitting.
  571. Specifically, check that:
  572. - Each file is in at most one CL
  573. - Each file and action appear in the list of changed files reported by git
  574. - Warn if some files don't appear in any CL
  575. - Warn if a reviewer string looks wrong, or if a CL is empty
  576. """
  577. # Validate the parsed information
  578. if not cl_infos:
  579. EmitWarning("No CLs listed in file. No action will be taken.")
  580. return []
  581. files_in_loaded_cls = set()
  582. # Collect all files, ensuring no duplicates
  583. # Warn on empty CLs or invalid reviewer strings
  584. for info in cl_infos:
  585. if not info.files:
  586. EmitWarning("CL has no files, and will be skipped:\n",
  587. info.FormatForPrinting())
  588. for file_info in info.files:
  589. if file_info in files_in_loaded_cls:
  590. raise ClSplitParseError(
  591. f"File appears in multiple CLs in {filename}:\n{file_info}")
  592. files_in_loaded_cls.add(file_info)
  593. for reviewer in info.reviewers:
  594. if not (re.fullmatch(r"[^@]+@[^.]+\..+", reviewer)):
  595. EmitWarning("reviewer does not look like an email address: ",
  596. reviewer)
  597. # Strip empty CLs
  598. cl_infos = [info for info in cl_infos if info.files]
  599. # Ensure the files in the user-provided CL splitting match the files
  600. # that git reports.
  601. # Warn if not all the files git reports appear.
  602. # Fail if the user mentions a file that isn't reported by git
  603. files_on_disk = set(files_on_disk)
  604. if not files_in_loaded_cls.issubset(files_on_disk):
  605. extra_files = files_in_loaded_cls.difference(files_on_disk)
  606. extra_files_str = "\n".join(f"{action}, {file}"
  607. for (action, file) in extra_files)
  608. raise ClSplitParseError(
  609. f"Some files are listed in {filename} but do not match any files "
  610. f"listed by git:\n{extra_files_str}")
  611. unmentioned_files = files_on_disk.difference(files_in_loaded_cls)
  612. if (unmentioned_files):
  613. EmitWarning(
  614. "the following files are not included in any CL in {filename}. "
  615. "They will not be uploaded:")
  616. for file in unmentioned_files:
  617. Emit(file)
  618. def LoadSplittingFromFile(filename: str,
  619. files_on_disk: List[Tuple[str, str]]) -> List[CLInfo]:
  620. """
  621. Given a file and the list of <action>, <file> pairs reported by git,
  622. read the file and return the list of CLInfos it contains.
  623. """
  624. lines = gclient_utils.FileRead(filename).splitlines()
  625. cl_infos = ParseSplittings(lines)
  626. ValidateSplitting(cl_infos, filename, files_on_disk)
  627. return cl_infos
  628. def EditSplittingInteractively(
  629. cl_infos: List[CLInfo],
  630. files_on_disk: List[Tuple[str, str]]) -> List[CLInfo]:
  631. """
  632. Allow the user to edit the generated splitting using their default editor.
  633. Make sure the edited splitting is saved so they can retrieve it if needed.
  634. """
  635. tmp_file = SaveSplittingToTempFile(cl_infos, silent=True)
  636. splitting = gclient_utils.RunEditor(gclient_utils.FileRead(tmp_file), False)
  637. cl_infos = ParseSplittings(splitting.splitlines())
  638. # Save the edited splitting before validation, so the user can go back
  639. # and edit it if there are any typos
  640. SaveSplittingToFile(cl_infos, tmp_file)
  641. ValidateSplitting(cl_infos, "the provided splitting", files_on_disk)
  642. return cl_infos, tmp_file