scm.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. # Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. # Use of this source code is governed by a BSD-style license that can be
  3. # found in the LICENSE file.
  4. """SCM-specific utility classes."""
  5. import glob
  6. import io
  7. import os
  8. import platform
  9. import re
  10. import sys
  11. import gclient_utils
  12. import subprocess2
  13. # TODO: Should fix these warnings.
  14. # pylint: disable=line-too-long
  15. # constants used to identify the tree state of a directory.
  16. VERSIONED_NO = 0
  17. VERSIONED_DIR = 1
  18. VERSIONED_SUBMODULE = 2
  19. def ValidateEmail(email):
  20. return (re.match(r"^[a-zA-Z0-9._%\-+]+@[a-zA-Z0-9._%-]+.[a-zA-Z]{2,6}$",
  21. email) is not None)
  22. def GetCasedPath(path):
  23. """Elcheapos way to get the real path case on Windows."""
  24. if sys.platform.startswith('win') and os.path.exists(path):
  25. # Reconstruct the path.
  26. path = os.path.abspath(path)
  27. paths = path.split('\\')
  28. for i in range(len(paths)):
  29. if i == 0:
  30. # Skip drive letter.
  31. continue
  32. subpath = '\\'.join(paths[:i + 1])
  33. prev = len('\\'.join(paths[:i]))
  34. # glob.glob will return the cased path for the last item only. This
  35. # is why we are calling it in a loop. Extract the data we want and
  36. # put it back into the list.
  37. paths[i] = glob.glob(subpath + '*')[0][prev + 1:len(subpath)]
  38. path = '\\'.join(paths)
  39. return path
  40. def GenFakeDiff(filename):
  41. """Generates a fake diff from a file."""
  42. file_content = gclient_utils.FileRead(filename, 'rb').splitlines(True)
  43. filename = filename.replace(os.sep, '/')
  44. nb_lines = len(file_content)
  45. # We need to use / since patch on unix will fail otherwise.
  46. data = io.StringIO()
  47. data.write("Index: %s\n" % filename)
  48. data.write('=' * 67 + '\n')
  49. # Note: Should we use /dev/null instead?
  50. data.write("--- %s\n" % filename)
  51. data.write("+++ %s\n" % filename)
  52. data.write("@@ -0,0 +1,%d @@\n" % nb_lines)
  53. # Prepend '+' to every lines.
  54. for line in file_content:
  55. data.write('+')
  56. data.write(line)
  57. result = data.getvalue()
  58. data.close()
  59. return result
  60. def determine_scm(root):
  61. """Similar to upload.py's version but much simpler.
  62. Returns 'git' or None.
  63. """
  64. if os.path.isdir(os.path.join(root, '.git')):
  65. return 'git'
  66. try:
  67. subprocess2.check_call(['git', 'rev-parse', '--show-cdup'],
  68. stdout=subprocess2.DEVNULL,
  69. stderr=subprocess2.DEVNULL,
  70. cwd=root)
  71. return 'git'
  72. except (OSError, subprocess2.CalledProcessError):
  73. return None
  74. def only_int(val):
  75. if val.isdigit():
  76. return int(val)
  77. return 0
  78. class GIT(object):
  79. current_version = None
  80. rev_parse_cache = {}
  81. @staticmethod
  82. def ApplyEnvVars(kwargs):
  83. env = kwargs.pop('env', None) or os.environ.copy()
  84. # Don't prompt for passwords; just fail quickly and noisily.
  85. # By default, git will use an interactive terminal prompt when a
  86. # username/ password is needed. That shouldn't happen in the chromium
  87. # workflow, and if it does, then gclient may hide the prompt in the
  88. # midst of a flood of terminal spew. The only indication that something
  89. # has gone wrong will be when gclient hangs unresponsively. Instead, we
  90. # disable the password prompt and simply allow git to fail noisily. The
  91. # error message produced by git will be copied to gclient's output.
  92. env.setdefault('GIT_ASKPASS', 'true')
  93. env.setdefault('SSH_ASKPASS', 'true')
  94. # 'cat' is a magical git string that disables pagers on all platforms.
  95. env.setdefault('GIT_PAGER', 'cat')
  96. return env
  97. @staticmethod
  98. def Capture(args, cwd=None, strip_out=True, **kwargs):
  99. env = GIT.ApplyEnvVars(kwargs)
  100. output = subprocess2.check_output(['git'] + args,
  101. cwd=cwd,
  102. stderr=subprocess2.PIPE,
  103. env=env,
  104. **kwargs)
  105. output = output.decode('utf-8', 'replace')
  106. return output.strip() if strip_out else output
  107. @staticmethod
  108. def CaptureStatus(cwd, upstream_branch, end_commit=None):
  109. # type: (str, str, Optional[str]) -> Sequence[Tuple[str, str]]
  110. """Returns git status.
  111. Returns an array of (status, file) tuples."""
  112. if end_commit is None:
  113. end_commit = ''
  114. if upstream_branch is None:
  115. upstream_branch = GIT.GetUpstreamBranch(cwd)
  116. if upstream_branch is None:
  117. raise gclient_utils.Error('Cannot determine upstream branch')
  118. command = [
  119. '-c', 'core.quotePath=false', 'diff', '--name-status',
  120. '--no-renames', '--ignore-submodules=all', '-r',
  121. '%s...%s' % (upstream_branch, end_commit)
  122. ]
  123. status = GIT.Capture(command, cwd)
  124. results = []
  125. if status:
  126. for statusline in status.splitlines():
  127. # 3-way merges can cause the status can be 'MMM' instead of 'M'.
  128. # This can happen when the user has 2 local branches and he
  129. # diffs between these 2 branches instead diffing to upstream.
  130. m = re.match(r'^(\w)+\t(.+)$', statusline)
  131. if not m:
  132. raise gclient_utils.Error(
  133. 'status currently unsupported: %s' % statusline)
  134. # Only grab the first letter.
  135. results.append(('%s ' % m.group(1)[0], m.group(2)))
  136. return results
  137. @staticmethod
  138. def GetConfig(cwd, key, default=None):
  139. try:
  140. return GIT.Capture(['config', key], cwd=cwd)
  141. except subprocess2.CalledProcessError:
  142. return default
  143. @staticmethod
  144. def GetBranchConfig(cwd, branch, key, default=None):
  145. assert branch, 'A branch must be given'
  146. key = 'branch.%s.%s' % (branch, key)
  147. return GIT.GetConfig(cwd, key, default)
  148. @staticmethod
  149. def SetConfig(cwd, key, value=None):
  150. if value is None:
  151. args = ['config', '--unset', key]
  152. else:
  153. args = ['config', key, value]
  154. GIT.Capture(args, cwd=cwd)
  155. @staticmethod
  156. def SetBranchConfig(cwd, branch, key, value=None):
  157. assert branch, 'A branch must be given'
  158. key = 'branch.%s.%s' % (branch, key)
  159. GIT.SetConfig(cwd, key, value)
  160. @staticmethod
  161. def IsWorkTreeDirty(cwd):
  162. return GIT.Capture(['status', '-s'], cwd=cwd) != ''
  163. @staticmethod
  164. def GetEmail(cwd):
  165. """Retrieves the user email address if known."""
  166. return GIT.GetConfig(cwd, 'user.email', '')
  167. @staticmethod
  168. def ShortBranchName(branch):
  169. """Converts a name like 'refs/heads/foo' to just 'foo'."""
  170. return branch.replace('refs/heads/', '')
  171. @staticmethod
  172. def GetBranchRef(cwd):
  173. """Returns the full branch reference, e.g. 'refs/heads/main'."""
  174. try:
  175. return GIT.Capture(['symbolic-ref', 'HEAD'], cwd=cwd)
  176. except subprocess2.CalledProcessError:
  177. return None
  178. @staticmethod
  179. def GetRemoteHeadRef(cwd, url, remote):
  180. """Returns the full default remote branch reference, e.g.
  181. 'refs/remotes/origin/main'."""
  182. if os.path.exists(cwd):
  183. try:
  184. # Try using local git copy first
  185. ref = 'refs/remotes/%s/HEAD' % remote
  186. ref = GIT.Capture(['symbolic-ref', ref], cwd=cwd)
  187. if not ref.endswith('master'):
  188. return ref
  189. # Check if there are changes in the default branch for this
  190. # particular repository.
  191. GIT.Capture(['remote', 'set-head', '-a', remote], cwd=cwd)
  192. return GIT.Capture(['symbolic-ref', ref], cwd=cwd)
  193. except subprocess2.CalledProcessError:
  194. pass
  195. try:
  196. # Fetch information from git server
  197. resp = GIT.Capture(['ls-remote', '--symref', url, 'HEAD'])
  198. regex = r'^ref: (.*)\tHEAD$'
  199. for line in resp.split('\n'):
  200. m = re.match(regex, line)
  201. if m:
  202. return ''.join(GIT.RefToRemoteRef(m.group(1), remote))
  203. except subprocess2.CalledProcessError:
  204. pass
  205. # Return default branch
  206. return 'refs/remotes/%s/main' % remote
  207. @staticmethod
  208. def GetBranch(cwd):
  209. """Returns the short branch name, e.g. 'main'."""
  210. branchref = GIT.GetBranchRef(cwd)
  211. if branchref:
  212. return GIT.ShortBranchName(branchref)
  213. return None
  214. @staticmethod
  215. def GetRemoteBranches(cwd):
  216. return GIT.Capture(['branch', '-r'], cwd=cwd).split()
  217. @staticmethod
  218. def FetchUpstreamTuple(cwd, branch=None):
  219. """Returns a tuple containing remote and remote ref,
  220. e.g. 'origin', 'refs/heads/main'
  221. """
  222. try:
  223. branch = branch or GIT.GetBranch(cwd)
  224. except subprocess2.CalledProcessError:
  225. pass
  226. if branch:
  227. upstream_branch = GIT.GetBranchConfig(cwd, branch, 'merge')
  228. if upstream_branch:
  229. remote = GIT.GetBranchConfig(cwd, branch, 'remote', '.')
  230. return remote, upstream_branch
  231. upstream_branch = GIT.GetConfig(cwd, 'rietveld.upstream-branch')
  232. if upstream_branch:
  233. remote = GIT.GetConfig(cwd, 'rietveld.upstream-remote', '.')
  234. return remote, upstream_branch
  235. # Else, try to guess the origin remote.
  236. remote_branches = GIT.GetRemoteBranches(cwd)
  237. if 'origin/main' in remote_branches:
  238. # Fall back on origin/main if it exits.
  239. return 'origin', 'refs/heads/main'
  240. if 'origin/master' in remote_branches:
  241. # Fall back on origin/master if it exits.
  242. return 'origin', 'refs/heads/master'
  243. return None, None
  244. @staticmethod
  245. def RefToRemoteRef(ref, remote):
  246. """Convert a checkout ref to the equivalent remote ref.
  247. Returns:
  248. A tuple of the remote ref's (common prefix, unique suffix), or None if it
  249. doesn't appear to refer to a remote ref (e.g. it's a commit hash).
  250. """
  251. # TODO(mmoss): This is just a brute-force mapping based of the expected
  252. # git config. It's a bit better than the even more brute-force
  253. # replace('heads', ...), but could still be smarter (like maybe actually
  254. # using values gleaned from the git config).
  255. m = re.match('^(refs/(remotes/)?)?branch-heads/', ref or '')
  256. if m:
  257. return ('refs/remotes/branch-heads/', ref.replace(m.group(0), ''))
  258. m = re.match('^((refs/)?remotes/)?%s/|(refs/)?heads/' % remote, ref
  259. or '')
  260. if m:
  261. return ('refs/remotes/%s/' % remote, ref.replace(m.group(0), ''))
  262. return None
  263. @staticmethod
  264. def RemoteRefToRef(ref, remote):
  265. assert remote, 'A remote must be given'
  266. if not ref or not ref.startswith('refs/'):
  267. return None
  268. if not ref.startswith('refs/remotes/'):
  269. return ref
  270. if ref.startswith('refs/remotes/branch-heads/'):
  271. return 'refs' + ref[len('refs/remotes'):]
  272. if ref.startswith('refs/remotes/%s/' % remote):
  273. return 'refs/heads' + ref[len('refs/remotes/%s' % remote):]
  274. return None
  275. @staticmethod
  276. def GetUpstreamBranch(cwd):
  277. """Gets the current branch's upstream branch."""
  278. remote, upstream_branch = GIT.FetchUpstreamTuple(cwd)
  279. if remote != '.' and upstream_branch:
  280. remote_ref = GIT.RefToRemoteRef(upstream_branch, remote)
  281. if remote_ref:
  282. upstream_branch = ''.join(remote_ref)
  283. return upstream_branch
  284. @staticmethod
  285. def IsAncestor(maybe_ancestor, ref, cwd=None):
  286. # type: (string, string, Optional[string]) -> bool
  287. """Verifies if |maybe_ancestor| is an ancestor of |ref|."""
  288. try:
  289. GIT.Capture(['merge-base', '--is-ancestor', maybe_ancestor, ref],
  290. cwd=cwd)
  291. return True
  292. except subprocess2.CalledProcessError:
  293. return False
  294. @staticmethod
  295. def GetOldContents(cwd, filename, branch=None):
  296. if not branch:
  297. branch = GIT.GetUpstreamBranch(cwd)
  298. if platform.system() == 'Windows':
  299. # git show <sha>:<path> wants a posix path.
  300. filename = filename.replace('\\', '/')
  301. command = ['show', '%s:%s' % (branch, filename)]
  302. try:
  303. return GIT.Capture(command, cwd=cwd, strip_out=False)
  304. except subprocess2.CalledProcessError:
  305. return ''
  306. @staticmethod
  307. def GenerateDiff(cwd,
  308. branch=None,
  309. branch_head='HEAD',
  310. full_move=False,
  311. files=None):
  312. """Diffs against the upstream branch or optionally another branch.
  313. full_move means that move or copy operations should completely recreate the
  314. files, usually in the prospect to apply the patch for a try job."""
  315. if not branch:
  316. branch = GIT.GetUpstreamBranch(cwd)
  317. command = [
  318. '-c', 'core.quotePath=false', 'diff', '-p', '--no-color',
  319. '--no-prefix', '--no-ext-diff', branch + "..." + branch_head
  320. ]
  321. if full_move:
  322. command.append('--no-renames')
  323. else:
  324. command.append('-C')
  325. # TODO(maruel): --binary support.
  326. if files:
  327. command.append('--')
  328. command.extend(files)
  329. diff = GIT.Capture(command, cwd=cwd, strip_out=False).splitlines(True)
  330. for i in range(len(diff)):
  331. # In the case of added files, replace /dev/null with the path to the
  332. # file being added.
  333. if diff[i].startswith('--- /dev/null'):
  334. diff[i] = '--- %s' % diff[i + 1][4:]
  335. return ''.join(diff)
  336. @staticmethod
  337. def GetDifferentFiles(cwd, branch=None, branch_head='HEAD'):
  338. """Returns the list of modified files between two branches."""
  339. if not branch:
  340. branch = GIT.GetUpstreamBranch(cwd)
  341. command = [
  342. '-c', 'core.quotePath=false', 'diff', '--name-only',
  343. branch + "..." + branch_head
  344. ]
  345. return GIT.Capture(command, cwd=cwd).splitlines(False)
  346. @staticmethod
  347. def GetAllFiles(cwd):
  348. """Returns the list of all files under revision control."""
  349. command = ['-c', 'core.quotePath=false', 'ls-files', '-s', '--', '.']
  350. files = GIT.Capture(command, cwd=cwd).splitlines(False)
  351. # return only files
  352. return [f.split(maxsplit=3)[-1] for f in files if f.startswith('100')]
  353. @staticmethod
  354. def GetSubmoduleCommits(cwd, submodules):
  355. # type: (string, List[string]) => Mapping[string][string]
  356. """Returns a mapping of staged or committed new commits for submodules."""
  357. if not submodules:
  358. return {}
  359. result = subprocess2.check_output(['git', 'ls-files', '-s', '--'] +
  360. submodules,
  361. cwd=cwd).decode('utf-8')
  362. commit_hashes = {}
  363. for r in result.splitlines():
  364. # ['<mode>', '<commit_hash>', '<stage_number>', '<path>'].
  365. record = r.strip().split(maxsplit=3) # path can contain spaces.
  366. assert record[0] == '160000', 'file is not a gitlink: %s' % record
  367. commit_hashes[record[3]] = record[1]
  368. return commit_hashes
  369. @staticmethod
  370. def GetPatchName(cwd):
  371. """Constructs a name for this patch."""
  372. short_sha = GIT.Capture(['rev-parse', '--short=4', 'HEAD'], cwd=cwd)
  373. return "%s#%s" % (GIT.GetBranch(cwd), short_sha)
  374. @staticmethod
  375. def GetCheckoutRoot(cwd):
  376. """Returns the top level directory of a git checkout as an absolute path.
  377. """
  378. root = GIT.Capture(['rev-parse', '--show-cdup'], cwd=cwd)
  379. return os.path.abspath(os.path.join(cwd, root))
  380. @staticmethod
  381. def GetGitDir(cwd):
  382. return os.path.abspath(GIT.Capture(['rev-parse', '--git-dir'], cwd=cwd))
  383. @staticmethod
  384. def IsInsideWorkTree(cwd):
  385. try:
  386. return GIT.Capture(['rev-parse', '--is-inside-work-tree'], cwd=cwd)
  387. except (OSError, subprocess2.CalledProcessError):
  388. return False
  389. @staticmethod
  390. def IsVersioned(cwd, relative_dir):
  391. # type: (str, str) -> int
  392. """Checks whether the given |relative_dir| is part of cwd's repo."""
  393. output = GIT.Capture(['ls-tree', 'HEAD', '--', relative_dir], cwd=cwd)
  394. if not output:
  395. return VERSIONED_NO
  396. if output.startswith('160000'):
  397. return VERSIONED_SUBMODULE
  398. return VERSIONED_DIR
  399. @staticmethod
  400. def CleanupDir(cwd, relative_dir):
  401. """Cleans up untracked file inside |relative_dir|."""
  402. return bool(GIT.Capture(['clean', '-df', relative_dir], cwd=cwd))
  403. @staticmethod
  404. def ResolveCommit(cwd, rev):
  405. cache_key = None
  406. # We do this instead of rev-parse --verify rev^{commit}, since on
  407. # Windows git can be either an executable or batch script, each of which
  408. # requires escaping the caret (^) a different way.
  409. if gclient_utils.IsFullGitSha(rev):
  410. # Only cache full SHAs
  411. cache_key = hash(cwd + rev)
  412. if val := GIT.rev_parse_cache.get(cache_key):
  413. return val
  414. # git-rev parse --verify FULL_GIT_SHA always succeeds, even if we
  415. # don't have FULL_GIT_SHA locally. Removing the last character
  416. # forces git to check if FULL_GIT_SHA refers to an object in the
  417. # local database.
  418. rev = rev[:-1]
  419. res = GIT.Capture(['rev-parse', '--quiet', '--verify', rev], cwd=cwd)
  420. if cache_key:
  421. # We don't expect concurrent execution, so we don't lock anything.
  422. GIT.rev_parse_cache[cache_key] = res
  423. return res
  424. @staticmethod
  425. def IsValidRevision(cwd, rev, sha_only=False):
  426. """Verifies the revision is a proper git revision.
  427. sha_only: Fail unless rev is a sha hash.
  428. """
  429. try:
  430. sha = GIT.ResolveCommit(cwd, rev)
  431. except subprocess2.CalledProcessError:
  432. return None
  433. if sha_only:
  434. return sha == rev.lower()
  435. return True