git_cache.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936
  1. #!/usr/bin/env python
  2. # Copyright 2014 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """A git command for managing a local cache of git repositories."""
  6. from __future__ import print_function
  7. import contextlib
  8. import errno
  9. import logging
  10. import optparse
  11. import os
  12. import re
  13. import subprocess
  14. import sys
  15. import tempfile
  16. import threading
  17. import time
  18. try:
  19. import urlparse
  20. except ImportError: # For Py3 compatibility
  21. import urllib.parse as urlparse
  22. from download_from_google_storage import Gsutil
  23. import gclient_utils
  24. import subcommand
  25. # Analogous to gc.autopacklimit git config.
  26. GC_AUTOPACKLIMIT = 50
  27. GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
  28. try:
  29. # pylint: disable=undefined-variable
  30. WinErr = WindowsError
  31. except NameError:
  32. class WinErr(Exception):
  33. pass
  34. class LockError(Exception):
  35. pass
  36. class ClobberNeeded(Exception):
  37. pass
  38. def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
  39. sleep_time=0.25, printerr=None):
  40. """Executes |fn| up to |count| times, backing off exponentially.
  41. Args:
  42. fn (callable): The function to execute. If this raises a handled
  43. exception, the function will retry with exponential backoff.
  44. excs (tuple): A tuple of Exception types to handle. If one of these is
  45. raised by |fn|, a retry will be attempted. If |fn| raises an Exception
  46. that is not in this list, it will immediately pass through. If |excs|
  47. is empty, the Exception base class will be used.
  48. name (str): Optional operation name to print in the retry string.
  49. count (int): The number of times to try before allowing the exception to
  50. pass through.
  51. sleep_time (float): The initial number of seconds to sleep in between
  52. retries. This will be doubled each retry.
  53. printerr (callable): Function that will be called with the error string upon
  54. failures. If None, |logging.warning| will be used.
  55. Returns: The return value of the successful fn.
  56. """
  57. printerr = printerr or logging.warning
  58. for i in range(count):
  59. try:
  60. return fn()
  61. except excs as e:
  62. if (i+1) >= count:
  63. raise
  64. printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
  65. (name or 'operation'), sleep_time, (i+1), count, e))
  66. time.sleep(sleep_time)
  67. sleep_time *= 2
  68. class Lockfile(object):
  69. """Class to represent a cross-platform process-specific lockfile."""
  70. def __init__(self, path, timeout=0):
  71. self.path = os.path.abspath(path)
  72. self.timeout = timeout
  73. self.lockfile = self.path + ".lock"
  74. self.pid = os.getpid()
  75. def _read_pid(self):
  76. """Read the pid stored in the lockfile.
  77. Note: This method is potentially racy. By the time it returns the lockfile
  78. may have been unlocked, removed, or stolen by some other process.
  79. """
  80. try:
  81. with open(self.lockfile, 'r') as f:
  82. pid = int(f.readline().strip())
  83. except (IOError, ValueError):
  84. pid = None
  85. return pid
  86. def _make_lockfile(self):
  87. """Safely creates a lockfile containing the current pid."""
  88. open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
  89. fd = os.open(self.lockfile, open_flags, 0o644)
  90. f = os.fdopen(fd, 'w')
  91. print(self.pid, file=f)
  92. f.close()
  93. def _remove_lockfile(self):
  94. """Delete the lockfile. Complains (implicitly) if it doesn't exist.
  95. See gclient_utils.py:rmtree docstring for more explanation on the
  96. windows case.
  97. """
  98. if sys.platform == 'win32':
  99. lockfile = os.path.normcase(self.lockfile)
  100. def delete():
  101. exitcode = subprocess.call(['cmd.exe', '/c',
  102. 'del', '/f', '/q', lockfile])
  103. if exitcode != 0:
  104. raise LockError('Failed to remove lock: %s' % (lockfile,))
  105. exponential_backoff_retry(
  106. delete,
  107. excs=(LockError,),
  108. name='del [%s]' % (lockfile,))
  109. else:
  110. os.remove(self.lockfile)
  111. def lock(self):
  112. """Acquire the lock.
  113. This will block with a deadline of self.timeout seconds.
  114. """
  115. elapsed = 0
  116. while True:
  117. try:
  118. self._make_lockfile()
  119. return
  120. except OSError as e:
  121. if elapsed < self.timeout:
  122. sleep_time = max(10, min(3, self.timeout - elapsed))
  123. logging.info('Could not create git cache lockfile; '
  124. 'will retry after sleep(%d).', sleep_time);
  125. elapsed += sleep_time
  126. time.sleep(sleep_time)
  127. continue
  128. if e.errno == errno.EEXIST:
  129. raise LockError("%s is already locked" % self.path)
  130. else:
  131. raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
  132. def unlock(self):
  133. """Release the lock."""
  134. try:
  135. if not self.is_locked():
  136. raise LockError("%s is not locked" % self.path)
  137. if not self.i_am_locking():
  138. raise LockError("%s is locked, but not by me" % self.path)
  139. self._remove_lockfile()
  140. except WinErr:
  141. # Windows is unreliable when it comes to file locking. YMMV.
  142. pass
  143. def break_lock(self):
  144. """Remove the lock, even if it was created by someone else."""
  145. try:
  146. self._remove_lockfile()
  147. return True
  148. except OSError as exc:
  149. if exc.errno == errno.ENOENT:
  150. return False
  151. else:
  152. raise
  153. def is_locked(self):
  154. """Test if the file is locked by anyone.
  155. Note: This method is potentially racy. By the time it returns the lockfile
  156. may have been unlocked, removed, or stolen by some other process.
  157. """
  158. return os.path.exists(self.lockfile)
  159. def i_am_locking(self):
  160. """Test if the file is locked by this process."""
  161. return self.is_locked() and self.pid == self._read_pid()
  162. class Mirror(object):
  163. git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
  164. gsutil_exe = os.path.join(
  165. os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
  166. cachepath_lock = threading.Lock()
  167. UNSET_CACHEPATH = object()
  168. # Used for tests
  169. _GIT_CONFIG_LOCATION = []
  170. @staticmethod
  171. def parse_fetch_spec(spec):
  172. """Parses and canonicalizes a fetch spec.
  173. Returns (fetchspec, value_regex), where value_regex can be used
  174. with 'git config --replace-all'.
  175. """
  176. parts = spec.split(':', 1)
  177. src = parts[0].lstrip('+').rstrip('/')
  178. if not src.startswith('refs/'):
  179. src = 'refs/heads/%s' % src
  180. dest = parts[1].rstrip('/') if len(parts) > 1 else src
  181. regex = r'\+%s:.*' % src.replace('*', r'\*')
  182. return ('+%s:%s' % (src, dest), regex)
  183. def __init__(self, url, refs=None, print_func=None):
  184. self.url = url
  185. self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
  186. self.basedir = self.UrlToCacheDir(url)
  187. self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
  188. if print_func:
  189. self.print = self.print_without_file
  190. self.print_func = print_func
  191. else:
  192. self.print = print
  193. def print_without_file(self, message, **_kwargs):
  194. self.print_func(message)
  195. @contextlib.contextmanager
  196. def print_duration_of(self, what):
  197. start = time.time()
  198. try:
  199. yield
  200. finally:
  201. self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
  202. @property
  203. def bootstrap_bucket(self):
  204. b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
  205. if b:
  206. return b
  207. u = urlparse.urlparse(self.url)
  208. if u.netloc == 'chromium.googlesource.com':
  209. return 'chromium-git-cache'
  210. # TODO(tandrii): delete once LUCI migration is completed.
  211. # Only public hosts will be supported going forward.
  212. elif u.netloc == 'chrome-internal.googlesource.com':
  213. return 'chrome-git-cache'
  214. # Not recognized.
  215. return None
  216. @property
  217. def _gs_path(self):
  218. return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)
  219. @classmethod
  220. def FromPath(cls, path):
  221. return cls(cls.CacheDirToUrl(path))
  222. @staticmethod
  223. def UrlToCacheDir(url):
  224. """Convert a git url to a normalized form for the cache dir path."""
  225. parsed = urlparse.urlparse(url)
  226. # Get rid of the port. This is only needed for Windows tests, since tests
  227. # serve git from git://localhost:port/git, but Windows doesn't like ':' in
  228. # paths.
  229. netloc = parsed.netloc
  230. if ':' in netloc:
  231. netloc = netloc.split(':', 1)[0]
  232. norm_url = netloc + parsed.path
  233. if norm_url.endswith('.git'):
  234. norm_url = norm_url[:-len('.git')]
  235. # Use the same dir for authenticated URLs and unauthenticated URLs.
  236. norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
  237. return norm_url.replace('-', '--').replace('/', '-').lower()
  238. @staticmethod
  239. def CacheDirToUrl(path):
  240. """Convert a cache dir path to its corresponding url."""
  241. netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
  242. return 'https://%s' % netpath
  243. @classmethod
  244. def SetCachePath(cls, cachepath):
  245. with cls.cachepath_lock:
  246. setattr(cls, 'cachepath', cachepath)
  247. @classmethod
  248. def GetCachePath(cls):
  249. with cls.cachepath_lock:
  250. if not hasattr(cls, 'cachepath'):
  251. try:
  252. cachepath = subprocess.check_output(
  253. [cls.git_exe, 'config'] +
  254. cls._GIT_CONFIG_LOCATION +
  255. ['cache.cachepath']).strip()
  256. except subprocess.CalledProcessError:
  257. cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
  258. setattr(cls, 'cachepath', cachepath)
  259. ret = getattr(cls, 'cachepath')
  260. if ret is cls.UNSET_CACHEPATH:
  261. raise RuntimeError('No cache.cachepath git configuration or '
  262. '$GIT_CACHE_PATH is set.')
  263. return ret
  264. @staticmethod
  265. def _GetMostRecentCacheDirectory(ls_out_set):
  266. ready_file_pattern = re.compile(r'.*/(\d+).ready$')
  267. ready_dirs = []
  268. for name in ls_out_set:
  269. m = ready_file_pattern.match(name)
  270. # Given <path>/<number>.ready,
  271. # we are interested in <path>/<number> directory
  272. if m and (name[:-len('.ready')] + '/') in ls_out_set:
  273. ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
  274. if not ready_dirs:
  275. return None
  276. return max(ready_dirs)[1]
  277. def Rename(self, src, dst):
  278. # This is somehow racy on Windows.
  279. # Catching OSError because WindowsError isn't portable and
  280. # pylint complains.
  281. exponential_backoff_retry(
  282. lambda: os.rename(src, dst),
  283. excs=(OSError,),
  284. name='rename [%s] => [%s]' % (src, dst),
  285. printerr=self.print)
  286. def RunGit(self, cmd, **kwargs):
  287. """Run git in a subprocess."""
  288. cwd = kwargs.setdefault('cwd', self.mirror_path)
  289. kwargs.setdefault('print_stdout', False)
  290. kwargs.setdefault('filter_fn', self.print)
  291. env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
  292. env.setdefault('GIT_ASKPASS', 'true')
  293. env.setdefault('SSH_ASKPASS', 'true')
  294. self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
  295. gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
  296. def config(self, cwd=None, reset_fetch_config=False):
  297. if cwd is None:
  298. cwd = self.mirror_path
  299. if reset_fetch_config:
  300. try:
  301. self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
  302. except subprocess.CalledProcessError as e:
  303. # If exit code was 5, it means we attempted to unset a config that
  304. # didn't exist. Ignore it.
  305. if e.returncode != 5:
  306. raise
  307. # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
  308. try:
  309. self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
  310. except subprocess.CalledProcessError:
  311. # Hard error, need to clobber.
  312. raise ClobberNeeded()
  313. # Don't combine pack files into one big pack file. It's really slow for
  314. # repositories, and there's no way to track progress and make sure it's
  315. # not stuck.
  316. if self.supported_project():
  317. self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
  318. # Allocate more RAM for cache-ing delta chains, for better performance
  319. # of "Resolving deltas".
  320. self.RunGit(['config', 'core.deltaBaseCacheLimit',
  321. gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
  322. self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
  323. self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
  324. '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
  325. for spec, value_regex in self.fetch_specs:
  326. self.RunGit(
  327. ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
  328. cwd=cwd)
  329. def bootstrap_repo(self, directory):
  330. """Bootstrap the repo from Google Storage if possible.
  331. More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
  332. """
  333. if not self.bootstrap_bucket:
  334. return False
  335. gsutil = Gsutil(self.gsutil_exe, boto_path=None)
  336. # Get the most recent version of the directory.
  337. # This is determined from the most recent version of a .ready file.
  338. # The .ready file is only uploaded when an entire directory has been
  339. # uploaded to GS.
  340. _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
  341. ls_out_set = set(ls_out.strip().splitlines())
  342. latest_dir = self._GetMostRecentCacheDirectory(ls_out_set)
  343. if not latest_dir:
  344. self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
  345. (self.mirror_path, self.bootstrap_bucket,
  346. ' '.join((ls_err or '').splitlines(True))))
  347. return False
  348. try:
  349. # create new temporary directory locally
  350. tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
  351. self.RunGit(['init', '--bare'], cwd=tempdir)
  352. self.print('Downloading files in %s/* into %s.' %
  353. (latest_dir, tempdir))
  354. with self.print_duration_of('download'):
  355. code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
  356. tempdir)
  357. if code:
  358. return False
  359. except Exception as e:
  360. self.print('Encountered error: %s' % str(e), file=sys.stderr)
  361. gclient_utils.rmtree(tempdir)
  362. return False
  363. # delete the old directory
  364. if os.path.exists(directory):
  365. gclient_utils.rmtree(directory)
  366. self.Rename(tempdir, directory)
  367. return True
  368. def contains_revision(self, revision):
  369. if not self.exists():
  370. return False
  371. if sys.platform.startswith('win'):
  372. # Windows .bat scripts use ^ as escape sequence, which means we have to
  373. # escape it with itself for every .bat invocation.
  374. needle = '%s^^^^{commit}' % revision
  375. else:
  376. needle = '%s^{commit}' % revision
  377. try:
  378. # cat-file exits with 0 on success, that is git object of given hash was
  379. # found.
  380. self.RunGit(['cat-file', '-e', needle])
  381. return True
  382. except subprocess.CalledProcessError:
  383. return False
  384. def exists(self):
  385. return os.path.isfile(os.path.join(self.mirror_path, 'config'))
  386. def supported_project(self):
  387. """Returns true if this repo is known to have a bootstrap zip file."""
  388. u = urlparse.urlparse(self.url)
  389. return u.netloc in [
  390. 'chromium.googlesource.com',
  391. 'chrome-internal.googlesource.com']
  392. def _preserve_fetchspec(self):
  393. """Read and preserve remote.origin.fetch from an existing mirror.
  394. This modifies self.fetch_specs.
  395. """
  396. if not self.exists():
  397. return
  398. try:
  399. config_fetchspecs = subprocess.check_output(
  400. [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
  401. cwd=self.mirror_path)
  402. for fetchspec in config_fetchspecs.splitlines():
  403. self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
  404. except subprocess.CalledProcessError:
  405. logging.warn('Tried and failed to preserve remote.origin.fetch from the '
  406. 'existing cache directory. You may need to manually edit '
  407. '%s and "git cache fetch" again.'
  408. % os.path.join(self.mirror_path, 'config'))
  409. def _ensure_bootstrapped(self, depth, bootstrap, force=False):
  410. pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
  411. pack_files = []
  412. if os.path.isdir(pack_dir):
  413. pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
  414. self.print('%s has %d .pack files, re-bootstrapping if >%d' %
  415. (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
  416. should_bootstrap = (force or
  417. not self.exists() or
  418. len(pack_files) > GC_AUTOPACKLIMIT)
  419. if not should_bootstrap:
  420. if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
  421. logging.warn(
  422. 'Shallow fetch requested, but repo cache already exists.')
  423. return
  424. if self.exists():
  425. # Re-bootstrapping an existing mirror; preserve existing fetch spec.
  426. self._preserve_fetchspec()
  427. else:
  428. if os.path.exists(self.mirror_path):
  429. # If the mirror path exists but self.exists() returns false, we're
  430. # in an unexpected state. Nuke the previous mirror directory and
  431. # start fresh.
  432. gclient_utils.rmtree(self.mirror_path)
  433. os.mkdir(self.mirror_path)
  434. bootstrapped = (not depth and bootstrap and
  435. self.bootstrap_repo(self.mirror_path))
  436. if not bootstrapped:
  437. if not self.exists() or not self.supported_project():
  438. # Bootstrap failed due to:
  439. # 1. No previous cache.
  440. # 2. Project doesn't have a bootstrap folder.
  441. # Start with a bare git dir.
  442. self.RunGit(['init', '--bare'], cwd=self.mirror_path)
  443. else:
  444. # Bootstrap failed, previous cache exists; warn and continue.
  445. logging.warn(
  446. 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
  447. 'but failed. Continuing with non-optimized repository.'
  448. % len(pack_files))
  449. def _fetch(self, rundir, verbose, depth, no_fetch_tags, reset_fetch_config):
  450. self.config(rundir, reset_fetch_config)
  451. v = []
  452. d = []
  453. t = []
  454. if verbose:
  455. v = ['-v', '--progress']
  456. if depth:
  457. d = ['--depth', str(depth)]
  458. if no_fetch_tags:
  459. t = ['--no-tags']
  460. fetch_cmd = ['fetch'] + v + d + t + ['origin']
  461. fetch_specs = subprocess.check_output(
  462. [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
  463. cwd=rundir).strip().splitlines()
  464. for spec in fetch_specs:
  465. spec = spec.decode()
  466. try:
  467. self.print('Fetching %s' % spec)
  468. with self.print_duration_of('fetch %s' % spec):
  469. self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
  470. except subprocess.CalledProcessError:
  471. if spec == '+refs/heads/*:refs/heads/*':
  472. raise ClobberNeeded() # Corrupted cache.
  473. logging.warn('Fetch of %s failed' % spec)
  474. def populate(self,
  475. depth=None,
  476. no_fetch_tags=False,
  477. shallow=False,
  478. bootstrap=False,
  479. verbose=False,
  480. ignore_lock=False,
  481. lock_timeout=0,
  482. reset_fetch_config=False):
  483. assert self.GetCachePath()
  484. if shallow and not depth:
  485. depth = 10000
  486. gclient_utils.safe_makedirs(self.GetCachePath())
  487. lockfile = Lockfile(self.mirror_path, lock_timeout)
  488. if not ignore_lock:
  489. lockfile.lock()
  490. try:
  491. self._ensure_bootstrapped(depth, bootstrap)
  492. self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
  493. reset_fetch_config)
  494. except ClobberNeeded:
  495. # This is a major failure, we need to clean and force a bootstrap.
  496. gclient_utils.rmtree(self.mirror_path)
  497. self.print(GIT_CACHE_CORRUPT_MESSAGE)
  498. self._ensure_bootstrapped(depth, bootstrap, force=True)
  499. self._fetch(self.mirror_path, verbose, depth, no_fetch_tags,
  500. reset_fetch_config)
  501. finally:
  502. if not ignore_lock:
  503. lockfile.unlock()
  504. def update_bootstrap(self, prune=False, gc_aggressive=False):
  505. # The folder is <git number>
  506. gen_number = subprocess.check_output(
  507. [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
  508. gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
  509. src_name = self.mirror_path
  510. dest_prefix = '%s/%s' % (self._gs_path, gen_number)
  511. # ls_out lists contents in the format: gs://blah/blah/123...
  512. _, ls_out, _ = gsutil.check_call('ls', self._gs_path)
  513. # Check to see if folder already exists in gs
  514. ls_out_set = set(ls_out.strip().splitlines())
  515. if (dest_prefix + '/' in ls_out_set and
  516. dest_prefix + '.ready' in ls_out_set):
  517. print('Cache %s already exists.' % dest_prefix)
  518. return
  519. # Run Garbage Collect to compress packfile.
  520. gc_args = ['gc', '--prune=all']
  521. if gc_aggressive:
  522. gc_args.append('--aggressive')
  523. self.RunGit(gc_args)
  524. gsutil.call('-m', 'cp', '-r', src_name, dest_prefix)
  525. # Create .ready file and upload
  526. _, ready_file_name = tempfile.mkstemp(suffix='.ready')
  527. try:
  528. gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix))
  529. finally:
  530. os.remove(ready_file_name)
  531. # remove all other directory/.ready files in the same gs_path
  532. # except for the directory/.ready file previously created
  533. # which can be used for bootstrapping while the current one is
  534. # being uploaded
  535. if not prune:
  536. return
  537. prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set)
  538. if not prev_dest_prefix:
  539. return
  540. for path in ls_out_set:
  541. if (path == prev_dest_prefix + '/' or
  542. path == prev_dest_prefix + '.ready'):
  543. continue
  544. if path.endswith('.ready'):
  545. gsutil.call('rm', path)
  546. continue
  547. gsutil.call('-m', 'rm', '-r', path)
  548. @staticmethod
  549. def DeleteTmpPackFiles(path):
  550. pack_dir = os.path.join(path, 'objects', 'pack')
  551. if not os.path.isdir(pack_dir):
  552. return
  553. pack_files = [f for f in os.listdir(pack_dir) if
  554. f.startswith('.tmp-') or f.startswith('tmp_pack_')]
  555. for f in pack_files:
  556. f = os.path.join(pack_dir, f)
  557. try:
  558. os.remove(f)
  559. logging.warn('Deleted stale temporary pack file %s' % f)
  560. except OSError:
  561. logging.warn('Unable to delete temporary pack file %s' % f)
  562. @classmethod
  563. def BreakLocks(cls, path):
  564. did_unlock = False
  565. lf = Lockfile(path)
  566. if lf.break_lock():
  567. did_unlock = True
  568. # Look for lock files that might have been left behind by an interrupted
  569. # git process.
  570. lf = os.path.join(path, 'config.lock')
  571. if os.path.exists(lf):
  572. os.remove(lf)
  573. did_unlock = True
  574. cls.DeleteTmpPackFiles(path)
  575. return did_unlock
  576. def unlock(self):
  577. return self.BreakLocks(self.mirror_path)
  578. @classmethod
  579. def UnlockAll(cls):
  580. cachepath = cls.GetCachePath()
  581. if not cachepath:
  582. return
  583. dirlist = os.listdir(cachepath)
  584. repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
  585. if os.path.isdir(os.path.join(cachepath, path))])
  586. for dirent in dirlist:
  587. if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
  588. gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
  589. elif (dirent.endswith('.lock') and
  590. os.path.isfile(os.path.join(cachepath, dirent))):
  591. repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
  592. unlocked_repos = []
  593. for repo_dir in repo_dirs:
  594. if cls.BreakLocks(repo_dir):
  595. unlocked_repos.append(repo_dir)
  596. return unlocked_repos
  597. @subcommand.usage('[url of repo to check for caching]')
  598. def CMDexists(parser, args):
  599. """Check to see if there already is a cache of the given repo."""
  600. _, args = parser.parse_args(args)
  601. if not len(args) == 1:
  602. parser.error('git cache exists only takes exactly one repo url.')
  603. url = args[0]
  604. mirror = Mirror(url)
  605. if mirror.exists():
  606. print(mirror.mirror_path)
  607. return 0
  608. return 1
  609. @subcommand.usage('[url of repo to create a bootstrap zip file]')
  610. def CMDupdate_bootstrap(parser, args):
  611. """Create and uploads a bootstrap tarball."""
  612. # Lets just assert we can't do this on Windows.
  613. if sys.platform.startswith('win'):
  614. print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
  615. return 1
  616. parser.add_option('--skip-populate', action='store_true',
  617. help='Skips "populate" step if mirror already exists.')
  618. parser.add_option('--gc-aggressive', action='store_true',
  619. help='Run aggressive repacking of the repo.')
  620. parser.add_option('--prune', action='store_true',
  621. help='Prune all other cached bundles of the same repo.')
  622. populate_args = args[:]
  623. options, args = parser.parse_args(args)
  624. url = args[0]
  625. mirror = Mirror(url)
  626. if not options.skip_populate or not mirror.exists():
  627. CMDpopulate(parser, populate_args)
  628. else:
  629. print('Skipped populate step.')
  630. # Get the repo directory.
  631. options, args = parser.parse_args(args)
  632. url = args[0]
  633. mirror = Mirror(url)
  634. mirror.update_bootstrap(options.prune, options.gc_aggressive)
  635. return 0
  636. @subcommand.usage('[url of repo to add to or update in cache]')
  637. def CMDpopulate(parser, args):
  638. """Ensure that the cache has all up-to-date objects for the given repo."""
  639. parser.add_option('--depth', type='int',
  640. help='Only cache DEPTH commits of history')
  641. parser.add_option(
  642. '--no-fetch-tags',
  643. action='store_true',
  644. help=('Don\'t fetch tags from the server. This can speed up '
  645. 'fetch considerably when there are many tags.'))
  646. parser.add_option('--shallow', '-s', action='store_true',
  647. help='Only cache 10000 commits of history')
  648. parser.add_option('--ref', action='append',
  649. help='Specify additional refs to be fetched')
  650. parser.add_option('--no_bootstrap', '--no-bootstrap',
  651. action='store_true',
  652. help='Don\'t bootstrap from Google Storage')
  653. parser.add_option('--ignore_locks', '--ignore-locks',
  654. action='store_true',
  655. help='Don\'t try to lock repository')
  656. parser.add_option('--break-locks',
  657. action='store_true',
  658. help='Break any existing lock instead of just ignoring it')
  659. parser.add_option('--reset-fetch-config', action='store_true', default=False,
  660. help='Reset the fetch config before populating the cache.')
  661. options, args = parser.parse_args(args)
  662. if not len(args) == 1:
  663. parser.error('git cache populate only takes exactly one repo url.')
  664. url = args[0]
  665. mirror = Mirror(url, refs=options.ref)
  666. if options.break_locks:
  667. mirror.unlock()
  668. kwargs = {
  669. 'no_fetch_tags': options.no_fetch_tags,
  670. 'verbose': options.verbose,
  671. 'shallow': options.shallow,
  672. 'bootstrap': not options.no_bootstrap,
  673. 'ignore_lock': options.ignore_locks,
  674. 'lock_timeout': options.timeout,
  675. 'reset_fetch_config': options.reset_fetch_config,
  676. }
  677. if options.depth:
  678. kwargs['depth'] = options.depth
  679. mirror.populate(**kwargs)
  680. @subcommand.usage('Fetch new commits into cache and current checkout')
  681. def CMDfetch(parser, args):
  682. """Update mirror, and fetch in cwd."""
  683. parser.add_option('--all', action='store_true', help='Fetch all remotes')
  684. parser.add_option('--no_bootstrap', '--no-bootstrap',
  685. action='store_true',
  686. help='Don\'t (re)bootstrap from Google Storage')
  687. parser.add_option(
  688. '--no-fetch-tags',
  689. action='store_true',
  690. help=('Don\'t fetch tags from the server. This can speed up '
  691. 'fetch considerably when there are many tags.'))
  692. options, args = parser.parse_args(args)
  693. # Figure out which remotes to fetch. This mimics the behavior of regular
  694. # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
  695. # this will NOT try to traverse up the branching structure to find the
  696. # ultimate remote to update.
  697. remotes = []
  698. if options.all:
  699. assert not args, 'fatal: fetch --all does not take a repository argument'
  700. remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
  701. elif args:
  702. remotes = args
  703. else:
  704. current_branch = subprocess.check_output(
  705. [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
  706. if current_branch != 'HEAD':
  707. upstream = subprocess.check_output(
  708. [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
  709. ).strip()
  710. if upstream and upstream != '.':
  711. remotes = [upstream]
  712. if not remotes:
  713. remotes = ['origin']
  714. cachepath = Mirror.GetCachePath()
  715. git_dir = os.path.abspath(subprocess.check_output(
  716. [Mirror.git_exe, 'rev-parse', '--git-dir']))
  717. git_dir = os.path.abspath(git_dir)
  718. if git_dir.startswith(cachepath):
  719. mirror = Mirror.FromPath(git_dir)
  720. mirror.populate(
  721. bootstrap=not options.no_bootstrap,
  722. no_fetch_tags=options.no_fetch_tags,
  723. lock_timeout=options.timeout)
  724. return 0
  725. for remote in remotes:
  726. remote_url = subprocess.check_output(
  727. [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
  728. if remote_url.startswith(cachepath):
  729. mirror = Mirror.FromPath(remote_url)
  730. mirror.print = lambda *args: None
  731. print('Updating git cache...')
  732. mirror.populate(
  733. bootstrap=not options.no_bootstrap,
  734. no_fetch_tags=options.no_fetch_tags,
  735. lock_timeout=options.timeout)
  736. subprocess.check_call([Mirror.git_exe, 'fetch', remote])
  737. return 0
  738. @subcommand.usage('[url of repo to unlock, or -a|--all]')
  739. def CMDunlock(parser, args):
  740. """Unlock one or all repos if their lock files are still around."""
  741. parser.add_option('--force', '-f', action='store_true',
  742. help='Actually perform the action')
  743. parser.add_option('--all', '-a', action='store_true',
  744. help='Unlock all repository caches')
  745. options, args = parser.parse_args(args)
  746. if len(args) > 1 or (len(args) == 0 and not options.all):
  747. parser.error('git cache unlock takes exactly one repo url, or --all')
  748. if not options.force:
  749. cachepath = Mirror.GetCachePath()
  750. lockfiles = [os.path.join(cachepath, path)
  751. for path in os.listdir(cachepath)
  752. if path.endswith('.lock') and os.path.isfile(path)]
  753. parser.error('git cache unlock requires -f|--force to do anything. '
  754. 'Refusing to unlock the following repo caches: '
  755. ', '.join(lockfiles))
  756. unlocked_repos = []
  757. if options.all:
  758. unlocked_repos.extend(Mirror.UnlockAll())
  759. else:
  760. m = Mirror(args[0])
  761. if m.unlock():
  762. unlocked_repos.append(m.mirror_path)
  763. if unlocked_repos:
  764. logging.info('Broke locks on these caches:\n %s' % '\n '.join(
  765. unlocked_repos))
  766. class OptionParser(optparse.OptionParser):
  767. """Wrapper class for OptionParser to handle global options."""
  768. def __init__(self, *args, **kwargs):
  769. optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
  770. self.add_option('-c', '--cache-dir',
  771. help=(
  772. 'Path to the directory containing the caches. Normally '
  773. 'deduced from git config cache.cachepath or '
  774. '$GIT_CACHE_PATH.'))
  775. self.add_option('-v', '--verbose', action='count', default=1,
  776. help='Increase verbosity (can be passed multiple times)')
  777. self.add_option('-q', '--quiet', action='store_true',
  778. help='Suppress all extraneous output')
  779. self.add_option('--timeout', type='int', default=0,
  780. help='Timeout for acquiring cache lock, in seconds')
  781. def parse_args(self, args=None, values=None):
  782. options, args = optparse.OptionParser.parse_args(self, args, values)
  783. if options.quiet:
  784. options.verbose = 0
  785. levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
  786. logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
  787. try:
  788. global_cache_dir = Mirror.GetCachePath()
  789. except RuntimeError:
  790. global_cache_dir = None
  791. if options.cache_dir:
  792. if global_cache_dir and (
  793. os.path.abspath(options.cache_dir) !=
  794. os.path.abspath(global_cache_dir)):
  795. logging.warn('Overriding globally-configured cache directory.')
  796. Mirror.SetCachePath(options.cache_dir)
  797. return options, args
  798. def main(argv):
  799. dispatcher = subcommand.CommandDispatcher(__name__)
  800. return dispatcher.execute(OptionParser(), argv)
  801. if __name__ == '__main__':
  802. try:
  803. sys.exit(main(sys.argv[1:]))
  804. except KeyboardInterrupt:
  805. sys.stderr.write('interrupted\n')
  806. sys.exit(1)