git_cache.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949
  1. #!/usr/bin/env python
  2. # Copyright 2014 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """A git command for managing a local cache of git repositories."""
  6. from __future__ import print_function
  7. import contextlib
  8. import errno
  9. import logging
  10. import optparse
  11. import os
  12. import re
  13. import subprocess
  14. import sys
  15. import tempfile
  16. import threading
  17. import time
  18. try:
  19. import urlparse
  20. except ImportError: # For Py3 compatibility
  21. import urllib.parse as urlparse
  22. from download_from_google_storage import Gsutil
  23. import gclient_utils
  24. import subcommand
  25. # Analogous to gc.autopacklimit git config.
  26. GC_AUTOPACKLIMIT = 50
  27. GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
  28. try:
  29. # pylint: disable=undefined-variable
  30. WinErr = WindowsError
  31. except NameError:
  32. class WinErr(Exception):
  33. pass
  34. class LockError(Exception):
  35. pass
  36. class ClobberNeeded(Exception):
  37. pass
  38. def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
  39. sleep_time=0.25, printerr=None):
  40. """Executes |fn| up to |count| times, backing off exponentially.
  41. Args:
  42. fn (callable): The function to execute. If this raises a handled
  43. exception, the function will retry with exponential backoff.
  44. excs (tuple): A tuple of Exception types to handle. If one of these is
  45. raised by |fn|, a retry will be attempted. If |fn| raises an Exception
  46. that is not in this list, it will immediately pass through. If |excs|
  47. is empty, the Exception base class will be used.
  48. name (str): Optional operation name to print in the retry string.
  49. count (int): The number of times to try before allowing the exception to
  50. pass through.
  51. sleep_time (float): The initial number of seconds to sleep in between
  52. retries. This will be doubled each retry.
  53. printerr (callable): Function that will be called with the error string upon
  54. failures. If None, |logging.warning| will be used.
  55. Returns: The return value of the successful fn.
  56. """
  57. printerr = printerr or logging.warning
  58. for i in range(count):
  59. try:
  60. return fn()
  61. except excs as e:
  62. if (i+1) >= count:
  63. raise
  64. printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
  65. (name or 'operation'), sleep_time, (i+1), count, e))
  66. time.sleep(sleep_time)
  67. sleep_time *= 2
  68. class Lockfile(object):
  69. """Class to represent a cross-platform process-specific lockfile."""
  70. def __init__(self, path, timeout=0):
  71. self.path = os.path.abspath(path)
  72. self.timeout = timeout
  73. self.lockfile = self.path + ".lock"
  74. self.pid = os.getpid()
  75. def _read_pid(self):
  76. """Read the pid stored in the lockfile.
  77. Note: This method is potentially racy. By the time it returns the lockfile
  78. may have been unlocked, removed, or stolen by some other process.
  79. """
  80. try:
  81. with open(self.lockfile, 'r') as f:
  82. pid = int(f.readline().strip())
  83. except (IOError, ValueError):
  84. pid = None
  85. return pid
  86. def _make_lockfile(self):
  87. """Safely creates a lockfile containing the current pid."""
  88. open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
  89. fd = os.open(self.lockfile, open_flags, 0o644)
  90. f = os.fdopen(fd, 'w')
  91. print(self.pid, file=f)
  92. f.close()
  93. def _remove_lockfile(self):
  94. """Delete the lockfile. Complains (implicitly) if it doesn't exist.
  95. See gclient_utils.py:rmtree docstring for more explanation on the
  96. windows case.
  97. """
  98. if sys.platform == 'win32':
  99. lockfile = os.path.normcase(self.lockfile)
  100. def delete():
  101. exitcode = subprocess.call(['cmd.exe', '/c',
  102. 'del', '/f', '/q', lockfile])
  103. if exitcode != 0:
  104. raise LockError('Failed to remove lock: %s' % (lockfile,))
  105. exponential_backoff_retry(
  106. delete,
  107. excs=(LockError,),
  108. name='del [%s]' % (lockfile,))
  109. else:
  110. os.remove(self.lockfile)
  111. def lock(self):
  112. """Acquire the lock.
  113. This will block with a deadline of self.timeout seconds.
  114. """
  115. elapsed = 0
  116. while True:
  117. try:
  118. self._make_lockfile()
  119. return
  120. except OSError as e:
  121. if elapsed < self.timeout:
  122. sleep_time = max(10, min(3, self.timeout - elapsed))
  123. logging.info('Could not create git cache lockfile; '
  124. 'will retry after sleep(%d).', sleep_time);
  125. elapsed += sleep_time
  126. time.sleep(sleep_time)
  127. continue
  128. if e.errno == errno.EEXIST:
  129. raise LockError("%s is already locked" % self.path)
  130. else:
  131. raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
  132. def unlock(self):
  133. """Release the lock."""
  134. try:
  135. if not self.is_locked():
  136. raise LockError("%s is not locked" % self.path)
  137. if not self.i_am_locking():
  138. raise LockError("%s is locked, but not by me" % self.path)
  139. self._remove_lockfile()
  140. except WinErr:
  141. # Windows is unreliable when it comes to file locking. YMMV.
  142. pass
  143. def break_lock(self):
  144. """Remove the lock, even if it was created by someone else."""
  145. try:
  146. self._remove_lockfile()
  147. return True
  148. except OSError as exc:
  149. if exc.errno == errno.ENOENT:
  150. return False
  151. else:
  152. raise
  153. def is_locked(self):
  154. """Test if the file is locked by anyone.
  155. Note: This method is potentially racy. By the time it returns the lockfile
  156. may have been unlocked, removed, or stolen by some other process.
  157. """
  158. return os.path.exists(self.lockfile)
  159. def i_am_locking(self):
  160. """Test if the file is locked by this process."""
  161. return self.is_locked() and self.pid == self._read_pid()
  162. class Mirror(object):
  163. git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
  164. gsutil_exe = os.path.join(
  165. os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
  166. cachepath_lock = threading.Lock()
  167. UNSET_CACHEPATH = object()
  168. # Used for tests
  169. _GIT_CONFIG_LOCATION = []
  170. @staticmethod
  171. def parse_fetch_spec(spec):
  172. """Parses and canonicalizes a fetch spec.
  173. Returns (fetchspec, value_regex), where value_regex can be used
  174. with 'git config --replace-all'.
  175. """
  176. parts = spec.split(':', 1)
  177. src = parts[0].lstrip('+').rstrip('/')
  178. if not src.startswith('refs/'):
  179. src = 'refs/heads/%s' % src
  180. dest = parts[1].rstrip('/') if len(parts) > 1 else src
  181. regex = r'\+%s:.*' % src.replace('*', r'\*')
  182. return ('+%s:%s' % (src, dest), regex)
  183. def __init__(self, url, refs=None, print_func=None):
  184. self.url = url
  185. self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
  186. self.basedir = self.UrlToCacheDir(url)
  187. self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
  188. if print_func:
  189. self.print = self.print_without_file
  190. self.print_func = print_func
  191. else:
  192. self.print = print
  193. def print_without_file(self, message, **_kwargs):
  194. self.print_func(message)
  195. @contextlib.contextmanager
  196. def print_duration_of(self, what):
  197. start = time.time()
  198. try:
  199. yield
  200. finally:
  201. self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
  202. @property
  203. def bootstrap_bucket(self):
  204. b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
  205. if b:
  206. return b
  207. u = urlparse.urlparse(self.url)
  208. if u.netloc == 'chromium.googlesource.com':
  209. return 'chromium-git-cache'
  210. # TODO(tandrii): delete once LUCI migration is completed.
  211. # Only public hosts will be supported going forward.
  212. elif u.netloc == 'chrome-internal.googlesource.com':
  213. return 'chrome-git-cache'
  214. # Not recognized.
  215. return None
  216. @property
  217. def _gs_path(self):
  218. return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)
  219. @classmethod
  220. def FromPath(cls, path):
  221. return cls(cls.CacheDirToUrl(path))
  222. @staticmethod
  223. def UrlToCacheDir(url):
  224. """Convert a git url to a normalized form for the cache dir path."""
  225. if os.path.isdir(url):
  226. # Ignore the drive letter in Windows
  227. url = os.path.splitdrive(url)[1]
  228. return url.replace('-', '--').replace(os.sep, '-')
  229. parsed = urlparse.urlparse(url)
  230. norm_url = parsed.netloc + parsed.path
  231. if norm_url.endswith('.git'):
  232. norm_url = norm_url[:-len('.git')]
  233. # Use the same dir for authenticated URLs and unauthenticated URLs.
  234. norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
  235. return norm_url.replace('-', '--').replace('/', '-').lower()
  236. @staticmethod
  237. def CacheDirToUrl(path):
  238. """Convert a cache dir path to its corresponding url."""
  239. netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
  240. return 'https://%s' % netpath
  241. @classmethod
  242. def SetCachePath(cls, cachepath):
  243. with cls.cachepath_lock:
  244. setattr(cls, 'cachepath', cachepath)
  245. @classmethod
  246. def GetCachePath(cls):
  247. with cls.cachepath_lock:
  248. if not hasattr(cls, 'cachepath'):
  249. try:
  250. cachepath = subprocess.check_output(
  251. [cls.git_exe, 'config'] +
  252. cls._GIT_CONFIG_LOCATION +
  253. ['cache.cachepath']).decode('utf-8', 'ignore').strip()
  254. except subprocess.CalledProcessError:
  255. cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
  256. setattr(cls, 'cachepath', cachepath)
  257. ret = getattr(cls, 'cachepath')
  258. if ret is cls.UNSET_CACHEPATH:
  259. raise RuntimeError('No cache.cachepath git configuration or '
  260. '$GIT_CACHE_PATH is set.')
  261. return ret
  262. @staticmethod
  263. def _GetMostRecentCacheDirectory(ls_out_set):
  264. ready_file_pattern = re.compile(r'.*/(\d+).ready$')
  265. ready_dirs = []
  266. for name in ls_out_set:
  267. m = ready_file_pattern.match(name)
  268. # Given <path>/<number>.ready,
  269. # we are interested in <path>/<number> directory
  270. if m and (name[:-len('.ready')] + '/') in ls_out_set:
  271. ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
  272. if not ready_dirs:
  273. return None
  274. return max(ready_dirs)[1]
  275. def Rename(self, src, dst):
  276. # This is somehow racy on Windows.
  277. # Catching OSError because WindowsError isn't portable and
  278. # pylint complains.
  279. exponential_backoff_retry(
  280. lambda: os.rename(src, dst),
  281. excs=(OSError,),
  282. name='rename [%s] => [%s]' % (src, dst),
  283. printerr=self.print)
  284. def RunGit(self, cmd, **kwargs):
  285. """Run git in a subprocess."""
  286. cwd = kwargs.setdefault('cwd', self.mirror_path)
  287. kwargs.setdefault('print_stdout', False)
  288. kwargs.setdefault('filter_fn', self.print)
  289. env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
  290. env.setdefault('GIT_ASKPASS', 'true')
  291. env.setdefault('SSH_ASKPASS', 'true')
  292. self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
  293. gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
  294. def config(self, cwd=None, reset_fetch_config=False):
  295. if cwd is None:
  296. cwd = self.mirror_path
  297. if reset_fetch_config:
  298. try:
  299. self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
  300. except subprocess.CalledProcessError as e:
  301. # If exit code was 5, it means we attempted to unset a config that
  302. # didn't exist. Ignore it.
  303. if e.returncode != 5:
  304. raise
  305. # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
  306. try:
  307. self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
  308. except subprocess.CalledProcessError:
  309. # Hard error, need to clobber.
  310. raise ClobberNeeded()
  311. # Don't combine pack files into one big pack file. It's really slow for
  312. # repositories, and there's no way to track progress and make sure it's
  313. # not stuck.
  314. if self.supported_project():
  315. self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
  316. # Allocate more RAM for cache-ing delta chains, for better performance
  317. # of "Resolving deltas".
  318. self.RunGit(['config', 'core.deltaBaseCacheLimit',
  319. gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
  320. self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
  321. self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
  322. '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
  323. for spec, value_regex in self.fetch_specs:
  324. self.RunGit(
  325. ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
  326. cwd=cwd)
  327. def bootstrap_repo(self, directory):
  328. """Bootstrap the repo from Google Storage if possible.
  329. More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
  330. """
  331. if not self.bootstrap_bucket:
  332. return False
  333. gsutil = Gsutil(self.gsutil_exe, boto_path=None)
  334. # Get the most recent version of the directory.
  335. # This is determined from the most recent version of a .ready file.
  336. # The .ready file is only uploaded when an entire directory has been
  337. # uploaded to GS.
  338. _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
  339. ls_out_set = set(ls_out.strip().splitlines())
  340. latest_dir = self._GetMostRecentCacheDirectory(ls_out_set)
  341. if not latest_dir:
  342. self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
  343. (self.mirror_path, self.bootstrap_bucket,
  344. ' '.join((ls_err or '').splitlines(True))))
  345. return False
  346. try:
  347. # create new temporary directory locally
  348. tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
  349. self.RunGit(['init', '--bare'], cwd=tempdir)
  350. self.print('Downloading files in %s/* into %s.' %
  351. (latest_dir, tempdir))
  352. with self.print_duration_of('download'):
  353. code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
  354. tempdir)
  355. if code:
  356. return False
  357. except Exception as e:
  358. self.print('Encountered error: %s' % str(e), file=sys.stderr)
  359. gclient_utils.rmtree(tempdir)
  360. return False
  361. # delete the old directory
  362. if os.path.exists(directory):
  363. gclient_utils.rmtree(directory)
  364. self.Rename(tempdir, directory)
  365. return True
  366. def contains_revision(self, revision):
  367. if not self.exists():
  368. return False
  369. if sys.platform.startswith('win'):
  370. # Windows .bat scripts use ^ as escape sequence, which means we have to
  371. # escape it with itself for every .bat invocation.
  372. needle = '%s^^^^{commit}' % revision
  373. else:
  374. needle = '%s^{commit}' % revision
  375. try:
  376. # cat-file exits with 0 on success, that is git object of given hash was
  377. # found.
  378. self.RunGit(['cat-file', '-e', needle])
  379. return True
  380. except subprocess.CalledProcessError:
  381. return False
  382. def exists(self):
  383. return os.path.isfile(os.path.join(self.mirror_path, 'config'))
  384. def supported_project(self):
  385. """Returns true if this repo is known to have a bootstrap zip file."""
  386. u = urlparse.urlparse(self.url)
  387. return u.netloc in [
  388. 'chromium.googlesource.com',
  389. 'chrome-internal.googlesource.com']
  390. def _preserve_fetchspec(self):
  391. """Read and preserve remote.origin.fetch from an existing mirror.
  392. This modifies self.fetch_specs.
  393. """
  394. if not self.exists():
  395. return
  396. try:
  397. config_fetchspecs = subprocess.check_output(
  398. [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
  399. cwd=self.mirror_path).decode('utf-8', 'ignore')
  400. for fetchspec in config_fetchspecs.splitlines():
  401. self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
  402. except subprocess.CalledProcessError:
  403. logging.warn('Tried and failed to preserve remote.origin.fetch from the '
  404. 'existing cache directory. You may need to manually edit '
  405. '%s and "git cache fetch" again.'
  406. % os.path.join(self.mirror_path, 'config'))
  407. def _ensure_bootstrapped(
  408. self, depth, bootstrap, reset_fetch_config, force=False):
  409. pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
  410. pack_files = []
  411. if os.path.isdir(pack_dir):
  412. pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
  413. self.print('%s has %d .pack files, re-bootstrapping if >%d or ==0' %
  414. (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
  415. should_bootstrap = (force or
  416. not self.exists() or
  417. len(pack_files) > GC_AUTOPACKLIMIT or
  418. len(pack_files) == 0)
  419. if not should_bootstrap:
  420. if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
  421. logging.warn(
  422. 'Shallow fetch requested, but repo cache already exists.')
  423. return
  424. if not self.exists():
  425. if os.path.exists(self.mirror_path):
  426. # If the mirror path exists but self.exists() returns false, we're
  427. # in an unexpected state. Nuke the previous mirror directory and
  428. # start fresh.
  429. gclient_utils.rmtree(self.mirror_path)
  430. os.mkdir(self.mirror_path)
  431. elif not reset_fetch_config:
  432. # Re-bootstrapping an existing mirror; preserve existing fetch spec.
  433. self._preserve_fetchspec()
  434. bootstrapped = (not depth and bootstrap and
  435. self.bootstrap_repo(self.mirror_path))
  436. if not bootstrapped:
  437. if not self.exists() or not self.supported_project():
  438. # Bootstrap failed due to:
  439. # 1. No previous cache.
  440. # 2. Project doesn't have a bootstrap folder.
  441. # Start with a bare git dir.
  442. self.RunGit(['init', '--bare'], cwd=self.mirror_path)
  443. else:
  444. # Bootstrap failed, previous cache exists; warn and continue.
  445. logging.warn(
  446. 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
  447. 'but failed. Continuing with non-optimized repository.'
  448. % len(pack_files))
  449. def _fetch(self,
  450. rundir,
  451. verbose,
  452. depth,
  453. no_fetch_tags,
  454. reset_fetch_config,
  455. prune=True):
  456. self.config(rundir, reset_fetch_config)
  457. fetch_cmd = ['fetch']
  458. if verbose:
  459. fetch_cmd.extend(['-v', '--progress'])
  460. if depth:
  461. fetch_cmd.extend(['--depth', str(depth)])
  462. if no_fetch_tags:
  463. fetch_cmd.append('--no-tags')
  464. if prune:
  465. fetch_cmd.append('--prune')
  466. fetch_cmd.append('origin')
  467. fetch_specs = subprocess.check_output(
  468. [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
  469. cwd=rundir).decode('utf-8', 'ignore').strip().splitlines()
  470. for spec in fetch_specs:
  471. try:
  472. self.print('Fetching %s' % spec)
  473. with self.print_duration_of('fetch %s' % spec):
  474. self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
  475. except subprocess.CalledProcessError:
  476. if spec == '+refs/heads/*:refs/heads/*':
  477. raise ClobberNeeded() # Corrupted cache.
  478. logging.warn('Fetch of %s failed' % spec)
  479. def populate(self,
  480. depth=None,
  481. no_fetch_tags=False,
  482. shallow=False,
  483. bootstrap=False,
  484. verbose=False,
  485. ignore_lock=False,
  486. lock_timeout=0,
  487. reset_fetch_config=False):
  488. assert self.GetCachePath()
  489. if shallow and not depth:
  490. depth = 10000
  491. gclient_utils.safe_makedirs(self.GetCachePath())
  492. lockfile = Lockfile(self.mirror_path, lock_timeout)
  493. if not ignore_lock:
  494. lockfile.lock()
  495. try:
  496. self._ensure_bootstrapped(depth, bootstrap, reset_fetch_config)
  497. self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
  498. reset_fetch_config)
  499. except ClobberNeeded:
  500. # This is a major failure, we need to clean and force a bootstrap.
  501. gclient_utils.rmtree(self.mirror_path)
  502. self.print(GIT_CACHE_CORRUPT_MESSAGE)
  503. self._ensure_bootstrapped(
  504. depth, bootstrap, reset_fetch_config, force=True)
  505. self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
  506. reset_fetch_config)
  507. finally:
  508. if not ignore_lock:
  509. lockfile.unlock()
  510. def update_bootstrap(self, prune=False, gc_aggressive=False):
  511. # The folder is <git number>
  512. gen_number = subprocess.check_output(
  513. [self.git_exe, 'number', 'master'],
  514. cwd=self.mirror_path).decode('utf-8', 'ignore').strip()
  515. gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
  516. src_name = self.mirror_path
  517. dest_prefix = '%s/%s' % (self._gs_path, gen_number)
  518. # ls_out lists contents in the format: gs://blah/blah/123...
  519. _, ls_out, _ = gsutil.check_call('ls', self._gs_path)
  520. # Check to see if folder already exists in gs
  521. ls_out_set = set(ls_out.strip().splitlines())
  522. if (dest_prefix + '/' in ls_out_set and
  523. dest_prefix + '.ready' in ls_out_set):
  524. print('Cache %s already exists.' % dest_prefix)
  525. return
  526. # Run Garbage Collect to compress packfile.
  527. gc_args = ['gc', '--prune=all']
  528. if gc_aggressive:
  529. gc_args.append('--aggressive')
  530. self.RunGit(gc_args)
  531. gsutil.call('-m', 'cp', '-r', src_name, dest_prefix)
  532. # Create .ready file and upload
  533. _, ready_file_name = tempfile.mkstemp(suffix='.ready')
  534. try:
  535. gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix))
  536. finally:
  537. os.remove(ready_file_name)
  538. # remove all other directory/.ready files in the same gs_path
  539. # except for the directory/.ready file previously created
  540. # which can be used for bootstrapping while the current one is
  541. # being uploaded
  542. if not prune:
  543. return
  544. prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set)
  545. if not prev_dest_prefix:
  546. return
  547. for path in ls_out_set:
  548. if (path == prev_dest_prefix + '/' or
  549. path == prev_dest_prefix + '.ready'):
  550. continue
  551. if path.endswith('.ready'):
  552. gsutil.call('rm', path)
  553. continue
  554. gsutil.call('-m', 'rm', '-r', path)
  555. @staticmethod
  556. def DeleteTmpPackFiles(path):
  557. pack_dir = os.path.join(path, 'objects', 'pack')
  558. if not os.path.isdir(pack_dir):
  559. return
  560. pack_files = [f for f in os.listdir(pack_dir) if
  561. f.startswith('.tmp-') or f.startswith('tmp_pack_')]
  562. for f in pack_files:
  563. f = os.path.join(pack_dir, f)
  564. try:
  565. os.remove(f)
  566. logging.warn('Deleted stale temporary pack file %s' % f)
  567. except OSError:
  568. logging.warn('Unable to delete temporary pack file %s' % f)
  569. @classmethod
  570. def BreakLocks(cls, path):
  571. did_unlock = False
  572. lf = Lockfile(path)
  573. if lf.break_lock():
  574. did_unlock = True
  575. # Look for lock files that might have been left behind by an interrupted
  576. # git process.
  577. lf = os.path.join(path, 'config.lock')
  578. if os.path.exists(lf):
  579. os.remove(lf)
  580. did_unlock = True
  581. cls.DeleteTmpPackFiles(path)
  582. return did_unlock
  583. def unlock(self):
  584. return self.BreakLocks(self.mirror_path)
  585. @classmethod
  586. def UnlockAll(cls):
  587. cachepath = cls.GetCachePath()
  588. if not cachepath:
  589. return
  590. dirlist = os.listdir(cachepath)
  591. repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
  592. if os.path.isdir(os.path.join(cachepath, path))])
  593. for dirent in dirlist:
  594. if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
  595. gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
  596. elif (dirent.endswith('.lock') and
  597. os.path.isfile(os.path.join(cachepath, dirent))):
  598. repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
  599. unlocked_repos = []
  600. for repo_dir in repo_dirs:
  601. if cls.BreakLocks(repo_dir):
  602. unlocked_repos.append(repo_dir)
  603. return unlocked_repos
  604. @subcommand.usage('[url of repo to check for caching]')
  605. def CMDexists(parser, args):
  606. """Check to see if there already is a cache of the given repo."""
  607. _, args = parser.parse_args(args)
  608. if not len(args) == 1:
  609. parser.error('git cache exists only takes exactly one repo url.')
  610. url = args[0]
  611. mirror = Mirror(url)
  612. if mirror.exists():
  613. print(mirror.mirror_path)
  614. return 0
  615. return 1
  616. @subcommand.usage('[url of repo to create a bootstrap zip file]')
  617. def CMDupdate_bootstrap(parser, args):
  618. """Create and uploads a bootstrap tarball."""
  619. # Lets just assert we can't do this on Windows.
  620. if sys.platform.startswith('win'):
  621. print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
  622. return 1
  623. parser.add_option('--skip-populate', action='store_true',
  624. help='Skips "populate" step if mirror already exists.')
  625. parser.add_option('--gc-aggressive', action='store_true',
  626. help='Run aggressive repacking of the repo.')
  627. parser.add_option('--prune', action='store_true',
  628. help='Prune all other cached bundles of the same repo.')
  629. populate_args = args[:]
  630. options, args = parser.parse_args(args)
  631. url = args[0]
  632. mirror = Mirror(url)
  633. if not options.skip_populate or not mirror.exists():
  634. CMDpopulate(parser, populate_args)
  635. else:
  636. print('Skipped populate step.')
  637. # Get the repo directory.
  638. _, args2 = parser.parse_args(args)
  639. url = args2[0]
  640. mirror = Mirror(url)
  641. mirror.update_bootstrap(options.prune, options.gc_aggressive)
  642. return 0
  643. @subcommand.usage('[url of repo to add to or update in cache]')
  644. def CMDpopulate(parser, args):
  645. """Ensure that the cache has all up-to-date objects for the given repo."""
  646. parser.add_option('--depth', type='int',
  647. help='Only cache DEPTH commits of history')
  648. parser.add_option(
  649. '--no-fetch-tags',
  650. action='store_true',
  651. help=('Don\'t fetch tags from the server. This can speed up '
  652. 'fetch considerably when there are many tags.'))
  653. parser.add_option('--shallow', '-s', action='store_true',
  654. help='Only cache 10000 commits of history')
  655. parser.add_option('--ref', action='append',
  656. help='Specify additional refs to be fetched')
  657. parser.add_option('--no_bootstrap', '--no-bootstrap',
  658. action='store_true',
  659. help='Don\'t bootstrap from Google Storage')
  660. parser.add_option('--ignore_locks', '--ignore-locks',
  661. action='store_true',
  662. help='Don\'t try to lock repository')
  663. parser.add_option('--break-locks',
  664. action='store_true',
  665. help='Break any existing lock instead of just ignoring it')
  666. parser.add_option('--reset-fetch-config', action='store_true', default=False,
  667. help='Reset the fetch config before populating the cache.')
  668. options, args = parser.parse_args(args)
  669. if not len(args) == 1:
  670. parser.error('git cache populate only takes exactly one repo url.')
  671. url = args[0]
  672. mirror = Mirror(url, refs=options.ref)
  673. if options.break_locks:
  674. mirror.unlock()
  675. kwargs = {
  676. 'no_fetch_tags': options.no_fetch_tags,
  677. 'verbose': options.verbose,
  678. 'shallow': options.shallow,
  679. 'bootstrap': not options.no_bootstrap,
  680. 'ignore_lock': options.ignore_locks,
  681. 'lock_timeout': options.timeout,
  682. 'reset_fetch_config': options.reset_fetch_config,
  683. }
  684. if options.depth:
  685. kwargs['depth'] = options.depth
  686. mirror.populate(**kwargs)
  687. @subcommand.usage('Fetch new commits into cache and current checkout')
  688. def CMDfetch(parser, args):
  689. """Update mirror, and fetch in cwd."""
  690. parser.add_option('--all', action='store_true', help='Fetch all remotes')
  691. parser.add_option('--no_bootstrap', '--no-bootstrap',
  692. action='store_true',
  693. help='Don\'t (re)bootstrap from Google Storage')
  694. parser.add_option(
  695. '--no-fetch-tags',
  696. action='store_true',
  697. help=('Don\'t fetch tags from the server. This can speed up '
  698. 'fetch considerably when there are many tags.'))
  699. options, args = parser.parse_args(args)
  700. # Figure out which remotes to fetch. This mimics the behavior of regular
  701. # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
  702. # this will NOT try to traverse up the branching structure to find the
  703. # ultimate remote to update.
  704. remotes = []
  705. if options.all:
  706. assert not args, 'fatal: fetch --all does not take a repository argument'
  707. remotes = subprocess.check_output([Mirror.git_exe, 'remote'])
  708. remotes = remotes.decode('utf-8', 'ignore').splitlines()
  709. elif args:
  710. remotes = args
  711. else:
  712. current_branch = subprocess.check_output(
  713. [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD'])
  714. current_branch = current_branch.decode('utf-8', 'ignore').strip()
  715. if current_branch != 'HEAD':
  716. upstream = subprocess.check_output(
  717. [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch])
  718. upstream = upstream.decode('utf-8', 'ignore').strip()
  719. if upstream and upstream != '.':
  720. remotes = [upstream]
  721. if not remotes:
  722. remotes = ['origin']
  723. cachepath = Mirror.GetCachePath()
  724. git_dir = os.path.abspath(subprocess.check_output(
  725. [Mirror.git_exe, 'rev-parse', '--git-dir']).decode('utf-8', 'ignore'))
  726. git_dir = os.path.abspath(git_dir)
  727. if git_dir.startswith(cachepath):
  728. mirror = Mirror.FromPath(git_dir)
  729. mirror.populate(
  730. bootstrap=not options.no_bootstrap,
  731. no_fetch_tags=options.no_fetch_tags,
  732. lock_timeout=options.timeout)
  733. return 0
  734. for remote in remotes:
  735. remote_url = subprocess.check_output(
  736. [Mirror.git_exe, 'config', 'remote.%s.url' % remote])
  737. remote_url = remote_url.decode('utf-8', 'ignore').strip()
  738. if remote_url.startswith(cachepath):
  739. mirror = Mirror.FromPath(remote_url)
  740. mirror.print = lambda *args: None
  741. print('Updating git cache...')
  742. mirror.populate(
  743. bootstrap=not options.no_bootstrap,
  744. no_fetch_tags=options.no_fetch_tags,
  745. lock_timeout=options.timeout)
  746. subprocess.check_call([Mirror.git_exe, 'fetch', remote])
  747. return 0
  748. @subcommand.usage('[url of repo to unlock, or -a|--all]')
  749. def CMDunlock(parser, args):
  750. """Unlock one or all repos if their lock files are still around."""
  751. parser.add_option('--force', '-f', action='store_true',
  752. help='Actually perform the action')
  753. parser.add_option('--all', '-a', action='store_true',
  754. help='Unlock all repository caches')
  755. options, args = parser.parse_args(args)
  756. if len(args) > 1 or (len(args) == 0 and not options.all):
  757. parser.error('git cache unlock takes exactly one repo url, or --all')
  758. if not options.force:
  759. cachepath = Mirror.GetCachePath()
  760. lockfiles = [os.path.join(cachepath, path)
  761. for path in os.listdir(cachepath)
  762. if path.endswith('.lock') and os.path.isfile(path)]
  763. parser.error('git cache unlock requires -f|--force to do anything. '
  764. 'Refusing to unlock the following repo caches: '
  765. ', '.join(lockfiles))
  766. unlocked_repos = []
  767. if options.all:
  768. unlocked_repos.extend(Mirror.UnlockAll())
  769. else:
  770. m = Mirror(args[0])
  771. if m.unlock():
  772. unlocked_repos.append(m.mirror_path)
  773. if unlocked_repos:
  774. logging.info('Broke locks on these caches:\n %s' % '\n '.join(
  775. unlocked_repos))
  776. class OptionParser(optparse.OptionParser):
  777. """Wrapper class for OptionParser to handle global options."""
  778. def __init__(self, *args, **kwargs):
  779. optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
  780. self.add_option('-c', '--cache-dir',
  781. help=(
  782. 'Path to the directory containing the caches. Normally '
  783. 'deduced from git config cache.cachepath or '
  784. '$GIT_CACHE_PATH.'))
  785. self.add_option('-v', '--verbose', action='count', default=1,
  786. help='Increase verbosity (can be passed multiple times)')
  787. self.add_option('-q', '--quiet', action='store_true',
  788. help='Suppress all extraneous output')
  789. self.add_option('--timeout', type='int', default=0,
  790. help='Timeout for acquiring cache lock, in seconds')
  791. def parse_args(self, args=None, values=None):
  792. options, args = optparse.OptionParser.parse_args(self, args, values)
  793. if options.quiet:
  794. options.verbose = 0
  795. levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
  796. logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
  797. try:
  798. global_cache_dir = Mirror.GetCachePath()
  799. except RuntimeError:
  800. global_cache_dir = None
  801. if options.cache_dir:
  802. if global_cache_dir and (
  803. os.path.abspath(options.cache_dir) !=
  804. os.path.abspath(global_cache_dir)):
  805. logging.warn('Overriding globally-configured cache directory.')
  806. Mirror.SetCachePath(options.cache_dir)
  807. return options, args
  808. def main(argv):
  809. dispatcher = subcommand.CommandDispatcher(__name__)
  810. return dispatcher.execute(OptionParser(), argv)
  811. if __name__ == '__main__':
  812. try:
  813. sys.exit(main(sys.argv[1:]))
  814. except KeyboardInterrupt:
  815. sys.stderr.write('interrupted\n')
  816. sys.exit(1)