scm.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. # Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. # Use of this source code is governed by a BSD-style license that can be
  3. # found in the LICENSE file.
  4. """SCM-specific utility classes."""
  5. import distutils.version
  6. import glob
  7. import io
  8. import os
  9. import platform
  10. import re
  11. import sys
  12. import gclient_utils
  13. import subprocess2
  14. def ValidateEmail(email):
  15. return (re.match(r"^[a-zA-Z0-9._%\-+]+@[a-zA-Z0-9._%-]+.[a-zA-Z]{2,6}$",
  16. email) is not None)
  17. def GetCasedPath(path):
  18. """Elcheapos way to get the real path case on Windows."""
  19. if sys.platform.startswith('win') and os.path.exists(path):
  20. # Reconstruct the path.
  21. path = os.path.abspath(path)
  22. paths = path.split('\\')
  23. for i in range(len(paths)):
  24. if i == 0:
  25. # Skip drive letter.
  26. continue
  27. subpath = '\\'.join(paths[:i + 1])
  28. prev = len('\\'.join(paths[:i]))
  29. # glob.glob will return the cased path for the last item only. This is why
  30. # we are calling it in a loop. Extract the data we want and put it back
  31. # into the list.
  32. paths[i] = glob.glob(subpath + '*')[0][prev + 1:len(subpath)]
  33. path = '\\'.join(paths)
  34. return path
  35. def GenFakeDiff(filename):
  36. """Generates a fake diff from a file."""
  37. file_content = gclient_utils.FileRead(filename, 'rb').splitlines(True)
  38. filename = filename.replace(os.sep, '/')
  39. nb_lines = len(file_content)
  40. # We need to use / since patch on unix will fail otherwise.
  41. data = io.StringIO()
  42. data.write("Index: %s\n" % filename)
  43. data.write('=' * 67 + '\n')
  44. # Note: Should we use /dev/null instead?
  45. data.write("--- %s\n" % filename)
  46. data.write("+++ %s\n" % filename)
  47. data.write("@@ -0,0 +1,%d @@\n" % nb_lines)
  48. # Prepend '+' to every lines.
  49. for line in file_content:
  50. data.write('+')
  51. data.write(line)
  52. result = data.getvalue()
  53. data.close()
  54. return result
  55. def determine_scm(root):
  56. """Similar to upload.py's version but much simpler.
  57. Returns 'git' or None.
  58. """
  59. if os.path.isdir(os.path.join(root, '.git')):
  60. return 'git'
  61. try:
  62. subprocess2.check_call(['git', 'rev-parse', '--show-cdup'],
  63. stdout=subprocess2.DEVNULL,
  64. stderr=subprocess2.DEVNULL,
  65. cwd=root)
  66. return 'git'
  67. except (OSError, subprocess2.CalledProcessError):
  68. return None
  69. def only_int(val):
  70. if val.isdigit():
  71. return int(val)
  72. return 0
  73. class GIT(object):
  74. current_version = None
  75. @staticmethod
  76. def ApplyEnvVars(kwargs):
  77. env = kwargs.pop('env', None) or os.environ.copy()
  78. # Don't prompt for passwords; just fail quickly and noisily.
  79. # By default, git will use an interactive terminal prompt when a username/
  80. # password is needed. That shouldn't happen in the chromium workflow,
  81. # and if it does, then gclient may hide the prompt in the midst of a flood
  82. # of terminal spew. The only indication that something has gone wrong
  83. # will be when gclient hangs unresponsively. Instead, we disable the
  84. # password prompt and simply allow git to fail noisily. The error
  85. # message produced by git will be copied to gclient's output.
  86. env.setdefault('GIT_ASKPASS', 'true')
  87. env.setdefault('SSH_ASKPASS', 'true')
  88. # 'cat' is a magical git string that disables pagers on all platforms.
  89. env.setdefault('GIT_PAGER', 'cat')
  90. return env
  91. @staticmethod
  92. def Capture(args, cwd=None, strip_out=True, **kwargs):
  93. env = GIT.ApplyEnvVars(kwargs)
  94. output = subprocess2.check_output(['git'] + args,
  95. cwd=cwd,
  96. stderr=subprocess2.PIPE,
  97. env=env,
  98. **kwargs)
  99. output = output.decode('utf-8', 'replace')
  100. return output.strip() if strip_out else output
  101. @staticmethod
  102. def CaptureStatus(cwd, upstream_branch, end_commit=None):
  103. # type: (str, str, Optional[str]) -> Sequence[Tuple[str, str]]
  104. """Returns git status.
  105. Returns an array of (status, file) tuples."""
  106. if end_commit is None:
  107. end_commit = ''
  108. if upstream_branch is None:
  109. upstream_branch = GIT.GetUpstreamBranch(cwd)
  110. if upstream_branch is None:
  111. raise gclient_utils.Error('Cannot determine upstream branch')
  112. command = [
  113. '-c', 'core.quotePath=false', 'diff', '--name-status', '--no-renames',
  114. '-r',
  115. '%s...%s' % (upstream_branch, end_commit)
  116. ]
  117. status = GIT.Capture(command, cwd)
  118. results = []
  119. if status:
  120. for statusline in status.splitlines():
  121. # 3-way merges can cause the status can be 'MMM' instead of 'M'. This
  122. # can happen when the user has 2 local branches and he diffs between
  123. # these 2 branches instead diffing to upstream.
  124. m = re.match(r'^(\w)+\t(.+)$', statusline)
  125. if not m:
  126. raise gclient_utils.Error('status currently unsupported: %s' %
  127. statusline)
  128. # Only grab the first letter.
  129. results.append(('%s ' % m.group(1)[0], m.group(2)))
  130. return results
  131. @staticmethod
  132. def GetConfig(cwd, key, default=None):
  133. try:
  134. return GIT.Capture(['config', key], cwd=cwd)
  135. except subprocess2.CalledProcessError:
  136. return default
  137. @staticmethod
  138. def GetBranchConfig(cwd, branch, key, default=None):
  139. assert branch, 'A branch must be given'
  140. key = 'branch.%s.%s' % (branch, key)
  141. return GIT.GetConfig(cwd, key, default)
  142. @staticmethod
  143. def SetConfig(cwd, key, value=None):
  144. if value is None:
  145. args = ['config', '--unset', key]
  146. else:
  147. args = ['config', key, value]
  148. GIT.Capture(args, cwd=cwd)
  149. @staticmethod
  150. def SetBranchConfig(cwd, branch, key, value=None):
  151. assert branch, 'A branch must be given'
  152. key = 'branch.%s.%s' % (branch, key)
  153. GIT.SetConfig(cwd, key, value)
  154. @staticmethod
  155. def IsWorkTreeDirty(cwd):
  156. return GIT.Capture(['status', '-s'], cwd=cwd) != ''
  157. @staticmethod
  158. def GetEmail(cwd):
  159. """Retrieves the user email address if known."""
  160. return GIT.GetConfig(cwd, 'user.email', '')
  161. @staticmethod
  162. def ShortBranchName(branch):
  163. """Converts a name like 'refs/heads/foo' to just 'foo'."""
  164. return branch.replace('refs/heads/', '')
  165. @staticmethod
  166. def GetBranchRef(cwd):
  167. """Returns the full branch reference, e.g. 'refs/heads/main'."""
  168. try:
  169. return GIT.Capture(['symbolic-ref', 'HEAD'], cwd=cwd)
  170. except subprocess2.CalledProcessError:
  171. return None
  172. @staticmethod
  173. def GetRemoteHeadRef(cwd, url, remote):
  174. """Returns the full default remote branch reference, e.g.
  175. 'refs/remotes/origin/main'."""
  176. if os.path.exists(cwd):
  177. try:
  178. # Try using local git copy first
  179. ref = 'refs/remotes/%s/HEAD' % remote
  180. ref = GIT.Capture(['symbolic-ref', ref], cwd=cwd)
  181. if not ref.endswith('master'):
  182. return ref
  183. # Check if there are changes in the default branch for this particular
  184. # repository.
  185. GIT.Capture(['remote', 'set-head', '-a', remote], cwd=cwd)
  186. return GIT.Capture(['symbolic-ref', ref], cwd=cwd)
  187. except subprocess2.CalledProcessError:
  188. pass
  189. try:
  190. # Fetch information from git server
  191. resp = GIT.Capture(['ls-remote', '--symref', url, 'HEAD'])
  192. regex = r'^ref: (.*)\tHEAD$'
  193. for line in resp.split('\n'):
  194. m = re.match(regex, line)
  195. if m:
  196. return ''.join(GIT.RefToRemoteRef(m.group(1), remote))
  197. except subprocess2.CalledProcessError:
  198. pass
  199. # Return default branch
  200. return 'refs/remotes/%s/main' % remote
  201. @staticmethod
  202. def GetBranch(cwd):
  203. """Returns the short branch name, e.g. 'main'."""
  204. branchref = GIT.GetBranchRef(cwd)
  205. if branchref:
  206. return GIT.ShortBranchName(branchref)
  207. return None
  208. @staticmethod
  209. def GetRemoteBranches(cwd):
  210. return GIT.Capture(['branch', '-r'], cwd=cwd).split()
  211. @staticmethod
  212. def FetchUpstreamTuple(cwd, branch=None):
  213. """Returns a tuple containing remote and remote ref,
  214. e.g. 'origin', 'refs/heads/main'
  215. """
  216. try:
  217. branch = branch or GIT.GetBranch(cwd)
  218. except subprocess2.CalledProcessError:
  219. pass
  220. if branch:
  221. upstream_branch = GIT.GetBranchConfig(cwd, branch, 'merge')
  222. if upstream_branch:
  223. remote = GIT.GetBranchConfig(cwd, branch, 'remote', '.')
  224. return remote, upstream_branch
  225. upstream_branch = GIT.GetConfig(cwd, 'rietveld.upstream-branch')
  226. if upstream_branch:
  227. remote = GIT.GetConfig(cwd, 'rietveld.upstream-remote', '.')
  228. return remote, upstream_branch
  229. # Else, try to guess the origin remote.
  230. remote_branches = GIT.GetRemoteBranches(cwd)
  231. if 'origin/main' in remote_branches:
  232. # Fall back on origin/main if it exits.
  233. return 'origin', 'refs/heads/main'
  234. if 'origin/master' in remote_branches:
  235. # Fall back on origin/master if it exits.
  236. return 'origin', 'refs/heads/master'
  237. return None, None
  238. @staticmethod
  239. def RefToRemoteRef(ref, remote):
  240. """Convert a checkout ref to the equivalent remote ref.
  241. Returns:
  242. A tuple of the remote ref's (common prefix, unique suffix), or None if it
  243. doesn't appear to refer to a remote ref (e.g. it's a commit hash).
  244. """
  245. # TODO(mmoss): This is just a brute-force mapping based of the expected git
  246. # config. It's a bit better than the even more brute-force replace('heads',
  247. # ...), but could still be smarter (like maybe actually using values gleaned
  248. # from the git config).
  249. m = re.match('^(refs/(remotes/)?)?branch-heads/', ref or '')
  250. if m:
  251. return ('refs/remotes/branch-heads/', ref.replace(m.group(0), ''))
  252. m = re.match('^((refs/)?remotes/)?%s/|(refs/)?heads/' % remote, ref or '')
  253. if m:
  254. return ('refs/remotes/%s/' % remote, ref.replace(m.group(0), ''))
  255. return None
  256. @staticmethod
  257. def RemoteRefToRef(ref, remote):
  258. assert remote, 'A remote must be given'
  259. if not ref or not ref.startswith('refs/'):
  260. return None
  261. if not ref.startswith('refs/remotes/'):
  262. return ref
  263. if ref.startswith('refs/remotes/branch-heads/'):
  264. return 'refs' + ref[len('refs/remotes'):]
  265. if ref.startswith('refs/remotes/%s/' % remote):
  266. return 'refs/heads' + ref[len('refs/remotes/%s' % remote):]
  267. return None
  268. @staticmethod
  269. def GetUpstreamBranch(cwd):
  270. """Gets the current branch's upstream branch."""
  271. remote, upstream_branch = GIT.FetchUpstreamTuple(cwd)
  272. if remote != '.' and upstream_branch:
  273. remote_ref = GIT.RefToRemoteRef(upstream_branch, remote)
  274. if remote_ref:
  275. upstream_branch = ''.join(remote_ref)
  276. return upstream_branch
  277. @staticmethod
  278. def IsAncestor(maybe_ancestor, ref, cwd=None):
  279. # type: (string, string, Optional[string]) -> bool
  280. """Verifies if |maybe_ancestor| is an ancestor of |ref|."""
  281. try:
  282. GIT.Capture(['merge-base', '--is-ancestor', maybe_ancestor, ref], cwd=cwd)
  283. return True
  284. except subprocess2.CalledProcessError:
  285. return False
  286. @staticmethod
  287. def GetOldContents(cwd, filename, branch=None):
  288. if not branch:
  289. branch = GIT.GetUpstreamBranch(cwd)
  290. if platform.system() == 'Windows':
  291. # git show <sha>:<path> wants a posix path.
  292. filename = filename.replace('\\', '/')
  293. command = ['show', '%s:%s' % (branch, filename)]
  294. try:
  295. return GIT.Capture(command, cwd=cwd, strip_out=False)
  296. except subprocess2.CalledProcessError:
  297. return ''
  298. @staticmethod
  299. def GenerateDiff(cwd,
  300. branch=None,
  301. branch_head='HEAD',
  302. full_move=False,
  303. files=None):
  304. """Diffs against the upstream branch or optionally another branch.
  305. full_move means that move or copy operations should completely recreate the
  306. files, usually in the prospect to apply the patch for a try job."""
  307. if not branch:
  308. branch = GIT.GetUpstreamBranch(cwd)
  309. command = [
  310. '-c', 'core.quotePath=false', 'diff', '-p', '--no-color', '--no-prefix',
  311. '--no-ext-diff', branch + "..." + branch_head
  312. ]
  313. if full_move:
  314. command.append('--no-renames')
  315. else:
  316. command.append('-C')
  317. # TODO(maruel): --binary support.
  318. if files:
  319. command.append('--')
  320. command.extend(files)
  321. diff = GIT.Capture(command, cwd=cwd, strip_out=False).splitlines(True)
  322. for i in range(len(diff)):
  323. # In the case of added files, replace /dev/null with the path to the
  324. # file being added.
  325. if diff[i].startswith('--- /dev/null'):
  326. diff[i] = '--- %s' % diff[i + 1][4:]
  327. return ''.join(diff)
  328. @staticmethod
  329. def GetDifferentFiles(cwd, branch=None, branch_head='HEAD'):
  330. """Returns the list of modified files between two branches."""
  331. if not branch:
  332. branch = GIT.GetUpstreamBranch(cwd)
  333. command = [
  334. '-c', 'core.quotePath=false', 'diff', '--name-only',
  335. branch + "..." + branch_head
  336. ]
  337. return GIT.Capture(command, cwd=cwd).splitlines(False)
  338. @staticmethod
  339. def GetAllFiles(cwd):
  340. """Returns the list of all files under revision control."""
  341. command = ['-c', 'core.quotePath=false', 'ls-files', '--', '.']
  342. return GIT.Capture(command, cwd=cwd).splitlines(False)
  343. @staticmethod
  344. def GetPatchName(cwd):
  345. """Constructs a name for this patch."""
  346. short_sha = GIT.Capture(['rev-parse', '--short=4', 'HEAD'], cwd=cwd)
  347. return "%s#%s" % (GIT.GetBranch(cwd), short_sha)
  348. @staticmethod
  349. def GetCheckoutRoot(cwd):
  350. """Returns the top level directory of a git checkout as an absolute path.
  351. """
  352. root = GIT.Capture(['rev-parse', '--show-cdup'], cwd=cwd)
  353. return os.path.abspath(os.path.join(cwd, root))
  354. @staticmethod
  355. def GetGitDir(cwd):
  356. return os.path.abspath(GIT.Capture(['rev-parse', '--git-dir'], cwd=cwd))
  357. @staticmethod
  358. def IsInsideWorkTree(cwd):
  359. try:
  360. return GIT.Capture(['rev-parse', '--is-inside-work-tree'], cwd=cwd)
  361. except (OSError, subprocess2.CalledProcessError):
  362. return False
  363. @staticmethod
  364. def IsDirectoryVersioned(cwd, relative_dir):
  365. """Checks whether the given |relative_dir| is part of cwd's repo."""
  366. return bool(GIT.Capture(['ls-tree', 'HEAD', relative_dir], cwd=cwd))
  367. @staticmethod
  368. def CleanupDir(cwd, relative_dir):
  369. """Cleans up untracked file inside |relative_dir|."""
  370. return bool(GIT.Capture(['clean', '-df', relative_dir], cwd=cwd))
  371. @staticmethod
  372. def ResolveCommit(cwd, rev):
  373. # We do this instead of rev-parse --verify rev^{commit}, since on Windows
  374. # git can be either an executable or batch script, each of which requires
  375. # escaping the caret (^) a different way.
  376. if gclient_utils.IsFullGitSha(rev):
  377. # git-rev parse --verify FULL_GIT_SHA always succeeds, even if we don't
  378. # have FULL_GIT_SHA locally. Removing the last character forces git to
  379. # check if FULL_GIT_SHA refers to an object in the local database.
  380. rev = rev[:-1]
  381. try:
  382. return GIT.Capture(['rev-parse', '--quiet', '--verify', rev], cwd=cwd)
  383. except subprocess2.CalledProcessError:
  384. return None
  385. @staticmethod
  386. def IsValidRevision(cwd, rev, sha_only=False):
  387. """Verifies the revision is a proper git revision.
  388. sha_only: Fail unless rev is a sha hash.
  389. """
  390. sha = GIT.ResolveCommit(cwd, rev)
  391. if sha is None:
  392. return False
  393. if sha_only:
  394. return sha == rev.lower()
  395. return True
  396. @classmethod
  397. def AssertVersion(cls, min_version):
  398. """Asserts git's version is at least min_version."""
  399. if cls.current_version is None:
  400. current_version = cls.Capture(['--version'], '.')
  401. matched = re.search(r'git version (.+)', current_version)
  402. cls.current_version = distutils.version.LooseVersion(matched.group(1))
  403. min_version = distutils.version.LooseVersion(min_version)
  404. return (min_version <= cls.current_version, cls.current_version)