git_cache.py 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943
  1. #!/usr/bin/env python3
  2. # Copyright 2014 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """A git command for managing a local cache of git repositories."""
  6. import contextlib
  7. import logging
  8. import optparse
  9. import os
  10. import re
  11. import subprocess
  12. import sys
  13. import tempfile
  14. import threading
  15. import time
  16. import urllib.parse
  17. from download_from_google_storage import Gsutil
  18. import gclient_utils
  19. import lockfile
  20. import metrics
  21. import subcommand
  22. # Analogous to gc.autopacklimit git config.
  23. GC_AUTOPACKLIMIT = 50
  24. GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
  25. INIT_SENTIENT_FILE = ".mirror_init"
  26. # gsutil creates many processes and threads. Creating too many gsutil cp
  27. # processes may result in running out of resources, and may perform worse due to
  28. # contextr switching. This limits how many concurrent gsutil cp processes
  29. # git_cache runs.
  30. GSUTIL_CP_SEMAPHORE = threading.Semaphore(2)
  31. try:
  32. # pylint: disable=undefined-variable
  33. WinErr = WindowsError
  34. except NameError:
  35. class WinErr(Exception):
  36. pass
  37. class ClobberNeeded(Exception):
  38. pass
  39. class Mirror(object):
  40. git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
  41. gsutil_exe = os.path.join(os.path.dirname(os.path.abspath(__file__)),
  42. 'gsutil.py')
  43. cachepath_lock = threading.Lock()
  44. UNSET_CACHEPATH = object()
  45. # Used for tests
  46. _GIT_CONFIG_LOCATION = []
  47. @staticmethod
  48. def parse_fetch_spec(spec):
  49. """Parses and canonicalizes a fetch spec.
  50. Returns (fetchspec, value_regex), where value_regex can be used
  51. with 'git config --replace-all'.
  52. """
  53. parts = spec.split(':', 1)
  54. src = parts[0].lstrip('+').rstrip('/')
  55. if not src.startswith('refs/'):
  56. src = 'refs/heads/%s' % src
  57. dest = parts[1].rstrip('/') if len(parts) > 1 else src
  58. regex = r'\+%s:.*' % src.replace('*', r'\*')
  59. return ('+%s:%s' % (src, dest), regex)
  60. def __init__(self, url, refs=None, commits=None, print_func=None):
  61. self.url = url
  62. self.fetch_specs = {self.parse_fetch_spec(ref) for ref in (refs or [])}
  63. self.fetch_commits = set(commits or [])
  64. self.basedir = self.UrlToCacheDir(url)
  65. self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
  66. if print_func:
  67. self.print = self.print_without_file
  68. self.print_func = print_func
  69. else:
  70. self.print = print
  71. def print_without_file(self, message, **_kwargs):
  72. self.print_func(message)
  73. @contextlib.contextmanager
  74. def print_duration_of(self, what):
  75. start = time.time()
  76. try:
  77. yield
  78. finally:
  79. self.print('%s took %.1f minutes' % (what,
  80. (time.time() - start) / 60.0))
  81. @property
  82. def _init_sentient_file(self):
  83. return os.path.join(self.mirror_path, INIT_SENTIENT_FILE)
  84. @property
  85. def bootstrap_bucket(self):
  86. b = os.getenv('OVERRIDE_BOOTSTRAP_BUCKET')
  87. if b:
  88. return b
  89. u = urllib.parse.urlparse(self.url)
  90. if u.netloc == 'chromium.googlesource.com':
  91. return 'chromium-git-cache'
  92. # Not recognized.
  93. return None
  94. @property
  95. def _gs_path(self):
  96. return 'gs://%s/v2/%s' % (self.bootstrap_bucket, self.basedir)
  97. @classmethod
  98. def FromPath(cls, path):
  99. return cls(cls.CacheDirToUrl(path))
  100. @staticmethod
  101. def UrlToCacheDir(url):
  102. """Convert a git url to a normalized form for the cache dir path."""
  103. if os.path.isdir(url):
  104. # Ignore the drive letter in Windows
  105. url = os.path.splitdrive(url)[1]
  106. return url.replace('-', '--').replace(os.sep, '-')
  107. parsed = urllib.parse.urlparse(url)
  108. norm_url = parsed.netloc + parsed.path
  109. if norm_url.endswith('.git'):
  110. norm_url = norm_url[:-len('.git')]
  111. # Use the same dir for authenticated URLs and unauthenticated URLs.
  112. norm_url = norm_url.replace('googlesource.com/a/', 'googlesource.com/')
  113. norm_url = norm_url.replace(':', '__')
  114. return norm_url.replace('-', '--').replace('/', '-').lower()
  115. @staticmethod
  116. def CacheDirToUrl(path):
  117. """Convert a cache dir path to its corresponding url."""
  118. netpath = re.sub(r'\b-\b', '/',
  119. os.path.basename(path)).replace('--', '-')
  120. netpath = netpath.replace('__', ':')
  121. if netpath.startswith('git@'):
  122. return netpath
  123. return 'https://%s' % netpath
  124. @classmethod
  125. def SetCachePath(cls, cachepath):
  126. with cls.cachepath_lock:
  127. setattr(cls, 'cachepath', cachepath)
  128. @classmethod
  129. def GetCachePath(cls):
  130. with cls.cachepath_lock:
  131. if not hasattr(cls, 'cachepath'):
  132. try:
  133. cachepath = subprocess.check_output(
  134. [cls.git_exe, 'config'] + cls._GIT_CONFIG_LOCATION +
  135. ['--type', 'path', 'cache.cachepath']).decode(
  136. 'utf-8', 'ignore').strip()
  137. except subprocess.CalledProcessError:
  138. cachepath = os.environ.get('GIT_CACHE_PATH',
  139. cls.UNSET_CACHEPATH)
  140. setattr(cls, 'cachepath', cachepath)
  141. ret = getattr(cls, 'cachepath')
  142. if ret is cls.UNSET_CACHEPATH:
  143. raise RuntimeError('No cache.cachepath git configuration or '
  144. '$GIT_CACHE_PATH is set.')
  145. return ret
  146. @staticmethod
  147. def _GetMostRecentCacheDirectory(ls_out_set):
  148. ready_file_pattern = re.compile(r'.*/(\d+).ready$')
  149. ready_dirs = []
  150. for name in ls_out_set:
  151. m = ready_file_pattern.match(name)
  152. # Given <path>/<number>.ready,
  153. # we are interested in <path>/<number> directory
  154. if m and (name[:-len('.ready')] + '/') in ls_out_set:
  155. ready_dirs.append((int(m.group(1)), name[:-len('.ready')]))
  156. if not ready_dirs:
  157. return None
  158. return max(ready_dirs)[1]
  159. def Rename(self, src, dst):
  160. # This is somehow racy on Windows.
  161. # Catching OSError because WindowsError isn't portable and
  162. # pylint complains.
  163. gclient_utils.exponential_backoff_retry(lambda: os.rename(src, dst),
  164. excs=(OSError, ),
  165. name='rename [%s] => [%s]' %
  166. (src, dst),
  167. printerr=self.print)
  168. def RunGit(self, cmd, print_stdout=True, **kwargs):
  169. """Run git in a subprocess."""
  170. cwd = kwargs.setdefault('cwd', self.mirror_path)
  171. if "--git-dir" not in cmd:
  172. cmd = ['--git-dir', os.path.abspath(cwd)] + cmd
  173. kwargs.setdefault('print_stdout', False)
  174. if print_stdout:
  175. kwargs.setdefault('filter_fn', self.print)
  176. env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
  177. env.setdefault('GIT_ASKPASS', 'true')
  178. env.setdefault('SSH_ASKPASS', 'true')
  179. self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
  180. return gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
  181. def config(self, reset_fetch_config=False):
  182. if reset_fetch_config:
  183. try:
  184. self.RunGit(['config', '--unset-all', 'remote.origin.fetch'])
  185. except subprocess.CalledProcessError as e:
  186. # If exit code was 5, it means we attempted to unset a config
  187. # that didn't exist. Ignore it.
  188. if e.returncode != 5:
  189. raise
  190. # Don't run git-gc in a daemon. Bad things can happen if it gets
  191. # killed.
  192. try:
  193. self.RunGit(['config', 'gc.autodetach', '0'])
  194. except subprocess.CalledProcessError:
  195. # Hard error, need to clobber.
  196. raise ClobberNeeded()
  197. # Don't combine pack files into one big pack file. It's really slow for
  198. # repositories, and there's no way to track progress and make sure it's
  199. # not stuck.
  200. if self.supported_project():
  201. self.RunGit(['config', 'gc.autopacklimit', '0'])
  202. # Allocate more RAM for cache-ing delta chains, for better performance
  203. # of "Resolving deltas".
  204. self.RunGit([
  205. 'config', 'core.deltaBaseCacheLimit',
  206. gclient_utils.DefaultDeltaBaseCacheLimit()
  207. ])
  208. self.RunGit(['config', 'remote.origin.url', self.url])
  209. self.RunGit([
  210. 'config', '--replace-all', 'remote.origin.fetch',
  211. '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'
  212. ])
  213. for spec, value_regex in self.fetch_specs:
  214. self.RunGit([
  215. 'config', '--replace-all', 'remote.origin.fetch', spec,
  216. value_regex
  217. ])
  218. def bootstrap_repo(self, directory):
  219. """Bootstrap the repo from Google Storage if possible.
  220. More apt-ly named
  221. bootstrap_repo_from_cloud_if_possible_else_do_nothing().
  222. """
  223. if not self.bootstrap_bucket:
  224. return False
  225. gsutil = Gsutil(self.gsutil_exe, boto_path=None)
  226. # Get the most recent version of the directory.
  227. # This is determined from the most recent version of a .ready file.
  228. # The .ready file is only uploaded when an entire directory has been
  229. # uploaded to GS.
  230. _, ls_out, ls_err = gsutil.check_call('ls', self._gs_path)
  231. ls_out_set = set(ls_out.strip().splitlines())
  232. latest_dir = self._GetMostRecentCacheDirectory(ls_out_set)
  233. if not latest_dir:
  234. self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
  235. (self.mirror_path, self.bootstrap_bucket, ' '.join(
  236. (ls_err or '').splitlines(True))))
  237. return False
  238. try:
  239. # create new temporary directory locally
  240. tempdir = tempfile.mkdtemp(prefix='_cache_tmp',
  241. dir=self.GetCachePath())
  242. self.RunGit(['init', '-b', 'main', '--bare'], cwd=tempdir)
  243. self.print('Downloading files in %s/* into %s.' %
  244. (latest_dir, tempdir))
  245. with self.print_duration_of('download'):
  246. with GSUTIL_CP_SEMAPHORE:
  247. code = gsutil.call('-m', 'cp', '-r', latest_dir + "/*",
  248. tempdir)
  249. if code:
  250. return False
  251. # A quick validation that all references are valid.
  252. self.RunGit(['for-each-ref'], print_stdout=False, cwd=tempdir)
  253. except Exception as e:
  254. self.print('Encountered error: %s' % str(e), file=sys.stderr)
  255. gclient_utils.rmtree(tempdir)
  256. return False
  257. # delete the old directory
  258. if os.path.exists(directory):
  259. gclient_utils.rmtree(directory)
  260. self.Rename(tempdir, directory)
  261. return True
  262. def contains_revision(self, revision):
  263. if not self.exists():
  264. return False
  265. if sys.platform.startswith('win'):
  266. # Windows .bat scripts use ^ as escape sequence, which means we have
  267. # to escape it with itself for every .bat invocation.
  268. needle = '%s^^^^{commit}' % revision
  269. else:
  270. needle = '%s^{commit}' % revision
  271. try:
  272. # cat-file exits with 0 on success, that is git object of given hash
  273. # was found.
  274. self.RunGit(['cat-file', '-e', needle])
  275. return True
  276. except subprocess.CalledProcessError:
  277. self.print('Commit with hash "%s" not found' % revision,
  278. file=sys.stderr)
  279. return False
  280. def exists(self):
  281. return os.path.isfile(os.path.join(self.mirror_path, 'config'))
  282. def supported_project(self):
  283. """Returns true if this repo is known to have a bootstrap zip file."""
  284. u = urllib.parse.urlparse(self.url)
  285. return u.netloc in [
  286. 'chromium.googlesource.com', 'chrome-internal.googlesource.com'
  287. ]
  288. def _preserve_fetchspec(self):
  289. """Read and preserve remote.origin.fetch from an existing mirror.
  290. This modifies self.fetch_specs.
  291. """
  292. if not self.exists():
  293. return
  294. try:
  295. config_fetchspecs = subprocess.check_output([
  296. self.git_exe, '--git-dir', self.mirror_path, 'config',
  297. '--get-all', 'remote.origin.fetch'
  298. ]).decode('utf-8', 'ignore')
  299. for fetchspec in config_fetchspecs.splitlines():
  300. self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
  301. except subprocess.CalledProcessError:
  302. logging.warning(
  303. 'Tried and failed to preserve remote.origin.fetch from the '
  304. 'existing cache directory. You may need to manually edit '
  305. '%s and "git cache fetch" again.' %
  306. os.path.join(self.mirror_path, 'config'))
  307. def _ensure_bootstrapped(self,
  308. depth,
  309. bootstrap,
  310. reset_fetch_config,
  311. force=False):
  312. pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
  313. pack_files = []
  314. if os.path.isdir(pack_dir):
  315. pack_files = [
  316. f for f in os.listdir(pack_dir) if f.endswith('.pack')
  317. ]
  318. self.print('%s has %d .pack files, re-bootstrapping if >%d or ==0' %
  319. (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
  320. # master->main branch migration left the cache in some builders to have
  321. # its HEAD still pointing to refs/heads/master. This causes bot_update
  322. # to fail. If in this state, delete the cache and force bootstrap.
  323. try:
  324. with open(os.path.join(self.mirror_path, 'HEAD')) as f:
  325. head_ref = f.read()
  326. except FileNotFoundError:
  327. head_ref = ''
  328. # Check only when HEAD points to master.
  329. if 'master' in head_ref:
  330. # Some repos could still have master so verify if the ref exists
  331. # first.
  332. show_ref_master_cmd = subprocess.run([
  333. Mirror.git_exe, '--git-dir', self.mirror_path, 'show-ref',
  334. '--verify', 'refs/heads/master'
  335. ])
  336. if show_ref_master_cmd.returncode != 0:
  337. # Remove mirror
  338. gclient_utils.rmtree(self.mirror_path)
  339. # force bootstrap
  340. force = True
  341. should_bootstrap = (force or not self.exists()
  342. or len(pack_files) > GC_AUTOPACKLIMIT
  343. or len(pack_files) == 0)
  344. if not should_bootstrap:
  345. if depth and os.path.exists(
  346. os.path.join(self.mirror_path, 'shallow')):
  347. logging.warning(
  348. 'Shallow fetch requested, but repo cache already exists.')
  349. return
  350. if not self.exists():
  351. if os.path.exists(self.mirror_path):
  352. # If the mirror path exists but self.exists() returns false,
  353. # we're in an unexpected state. Nuke the previous mirror
  354. # directory and start fresh.
  355. gclient_utils.rmtree(self.mirror_path)
  356. os.mkdir(self.mirror_path)
  357. elif not reset_fetch_config:
  358. # Re-bootstrapping an existing mirror; preserve existing fetch spec.
  359. self._preserve_fetchspec()
  360. bootstrapped = (not depth and bootstrap
  361. and self.bootstrap_repo(self.mirror_path))
  362. if not bootstrapped:
  363. if not self.exists() or not self.supported_project():
  364. # Bootstrap failed due to:
  365. # 1. No previous cache.
  366. # 2. Project doesn't have a bootstrap folder.
  367. # Start with a bare git dir.
  368. self.RunGit(['init', '--bare'])
  369. with open(self._init_sentient_file, 'w'):
  370. # Create sentient file
  371. pass
  372. self._set_symbolic_ref()
  373. else:
  374. # Bootstrap failed, previous cache exists; warn and continue.
  375. logging.warning(
  376. 'Git cache has a lot of pack files (%d). Tried to '
  377. 're-bootstrap but failed. Continuing with non-optimized '
  378. 'repository.' % len(pack_files))
  379. def _set_symbolic_ref(self):
  380. remote_info = gclient_utils.exponential_backoff_retry(
  381. lambda: subprocess.check_output([
  382. self.git_exe, '--git-dir',
  383. os.path.abspath(self.mirror_path), 'remote', 'show', self.url
  384. ],
  385. cwd=self.mirror_path).decode(
  386. 'utf-8', 'ignore').strip())
  387. default_branch_regexp = re.compile(r'HEAD branch: (.*)')
  388. m = default_branch_regexp.search(remote_info, re.MULTILINE)
  389. if m:
  390. self.RunGit(['symbolic-ref', 'HEAD', 'refs/heads/' + m.groups()[0]])
  391. def _fetch(self,
  392. verbose,
  393. depth,
  394. no_fetch_tags,
  395. reset_fetch_config,
  396. prune=True):
  397. self.config(reset_fetch_config)
  398. fetch_cmd = ['fetch']
  399. if verbose:
  400. fetch_cmd.extend(['-v', '--progress'])
  401. if depth:
  402. fetch_cmd.extend(['--depth', str(depth)])
  403. if no_fetch_tags:
  404. fetch_cmd.append('--no-tags')
  405. if prune:
  406. fetch_cmd.append('--prune')
  407. fetch_cmd.append('origin')
  408. fetch_specs = subprocess.check_output(
  409. [
  410. self.git_exe, '--git-dir',
  411. os.path.abspath(self.mirror_path), 'config', '--get-all',
  412. 'remote.origin.fetch'
  413. ],
  414. cwd=self.mirror_path).decode('utf-8',
  415. 'ignore').strip().splitlines()
  416. for spec in fetch_specs:
  417. try:
  418. self.print('Fetching %s' % spec)
  419. with self.print_duration_of('fetch %s' % spec):
  420. self.RunGit(fetch_cmd + [spec], retry=True)
  421. except subprocess.CalledProcessError:
  422. if spec == '+refs/heads/*:refs/heads/*':
  423. raise ClobberNeeded() # Corrupted cache.
  424. logging.warning('Fetch of %s failed' % spec)
  425. for commit in self.fetch_commits:
  426. self.print('Fetching %s' % commit)
  427. try:
  428. with self.print_duration_of('fetch %s' % commit):
  429. self.RunGit(['fetch', 'origin', commit], retry=True)
  430. except subprocess.CalledProcessError:
  431. logging.warning('Fetch of %s failed' % commit)
  432. if os.path.isfile(self._init_sentient_file):
  433. os.remove(self._init_sentient_file)
  434. # Since --prune is used, it's possible that HEAD no longer exists (e.g.
  435. # a repo uses new HEAD and old is removed). This ensures that HEAD still
  436. # points to a valid commit, otherwise gets a new HEAD.
  437. out = self.RunGit(['rev-parse', 'HEAD'], print_stdout=False)
  438. if out.startswith(b'HEAD'):
  439. self._set_symbolic_ref()
  440. def populate(self,
  441. depth=None,
  442. no_fetch_tags=False,
  443. shallow=False,
  444. bootstrap=False,
  445. verbose=False,
  446. lock_timeout=0,
  447. reset_fetch_config=False):
  448. assert self.GetCachePath()
  449. if shallow and not depth:
  450. depth = 10000
  451. gclient_utils.safe_makedirs(self.GetCachePath())
  452. def bootstrap_cache(force=False):
  453. self._ensure_bootstrapped(depth,
  454. bootstrap,
  455. reset_fetch_config,
  456. force=force)
  457. self._fetch(verbose, depth, no_fetch_tags, reset_fetch_config)
  458. def wipe_cache():
  459. self.print(GIT_CACHE_CORRUPT_MESSAGE)
  460. gclient_utils.rmtree(self.mirror_path)
  461. with lockfile.lock(self.mirror_path, lock_timeout):
  462. if os.path.isfile(self._init_sentient_file):
  463. # Previous bootstrap didn't finish
  464. wipe_cache()
  465. try:
  466. bootstrap_cache()
  467. except ClobberNeeded:
  468. # This is a major failure, we need to clean and force a
  469. # bootstrap.
  470. wipe_cache()
  471. bootstrap_cache(force=True)
  472. def update_bootstrap(self, prune=False, gc_aggressive=False):
  473. # NOTE: There have been cases where repos were being recursively
  474. # uploaded to google storage. E.g.
  475. # `<host_url>-<repo>/<gen_number>/<host_url>-<repo>/` in GS and
  476. # <host_url>-<repo>/<host_url>-<repo>/ on the bot. Check for recursed
  477. # files on the bot here and remove them if found before we upload to GS.
  478. # See crbug.com/1370443; keep this check until root cause is found.
  479. recursed_dir = os.path.join(self.mirror_path,
  480. self.mirror_path.split(os.path.sep)[-1])
  481. if os.path.exists(recursed_dir):
  482. self.print('Deleting unexpected directory: %s' % recursed_dir)
  483. gclient_utils.rmtree(recursed_dir)
  484. # The folder is <git number>
  485. gen_number = subprocess.check_output(
  486. [self.git_exe, '--git-dir', self.mirror_path,
  487. 'number']).decode('utf-8', 'ignore').strip()
  488. gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
  489. dest_prefix = '%s/%s' % (self._gs_path, gen_number)
  490. # ls_out lists contents in the format: gs://blah/blah/123...
  491. self.print('running "gsutil ls %s":' % self._gs_path)
  492. ls_code, ls_out, ls_error = gsutil.check_call_with_retries(
  493. 'ls', self._gs_path)
  494. if ls_code != 0:
  495. self.print(ls_error)
  496. else:
  497. self.print(ls_out)
  498. # Check to see if folder already exists in gs
  499. ls_out_set = set(ls_out.strip().splitlines())
  500. if (dest_prefix + '/' in ls_out_set
  501. and dest_prefix + '.ready' in ls_out_set):
  502. print('Cache %s already exists.' % dest_prefix)
  503. return
  504. # Reduce the number of individual files to download & write on disk.
  505. self.RunGit(['pack-refs', '--all'])
  506. # Run Garbage Collect to compress packfile.
  507. gc_args = ['gc', '--prune=all']
  508. if gc_aggressive:
  509. # The default "gc --aggressive" is often too aggressive for some
  510. # machines, since it attempts to create as many threads as there are
  511. # CPU cores, while not limiting per-thread memory usage, which puts
  512. # too much pressure on RAM on high-core machines, causing them to
  513. # thrash. Using lower-level commands gives more control over those
  514. # settings.
  515. # This might not be strictly necessary, but it's fast and is
  516. # normally run by 'gc --aggressive', so it shouldn't hurt.
  517. self.RunGit(['reflog', 'expire', '--all'])
  518. # These are the default repack settings for 'gc --aggressive'.
  519. gc_args = [
  520. 'repack', '-d', '-l', '-f', '--depth=50', '--window=250', '-A',
  521. '--unpack-unreachable=all'
  522. ]
  523. # A 1G memory limit seems to provide comparable pack results as the
  524. # default, even for our largest repos, while preventing runaway
  525. # memory (at least on current Chromium builders which have about 4G
  526. # RAM per core).
  527. gc_args.append('--window-memory=1g')
  528. # NOTE: It might also be possible to avoid thrashing with a larger
  529. # window (e.g. "--window-memory=2g") by limiting the number of
  530. # threads created (e.g. "--threads=[cores/2]"). Some limited testing
  531. # didn't show much difference in outcomes on our current repos, but
  532. # it might be worth trying if the repos grow much larger and the
  533. # packs don't seem to be getting compressed enough.
  534. self.RunGit(gc_args)
  535. self.print('running "gsutil -m rsync -r -d %s %s"' %
  536. (self.mirror_path, dest_prefix))
  537. gsutil.call('-m', 'rsync', '-r', '-d', self.mirror_path, dest_prefix)
  538. # Create .ready file and upload
  539. _, ready_file_name = tempfile.mkstemp(suffix='.ready')
  540. try:
  541. self.print('running "gsutil cp %s %s.ready"' %
  542. (ready_file_name, dest_prefix))
  543. gsutil.call('cp', ready_file_name, '%s.ready' % (dest_prefix))
  544. finally:
  545. os.remove(ready_file_name)
  546. # remove all other directory/.ready files in the same gs_path
  547. # except for the directory/.ready file previously created
  548. # which can be used for bootstrapping while the current one is
  549. # being uploaded
  550. if not prune:
  551. return
  552. prev_dest_prefix = self._GetMostRecentCacheDirectory(ls_out_set)
  553. if not prev_dest_prefix:
  554. return
  555. for path in ls_out_set:
  556. if path in (prev_dest_prefix + '/', prev_dest_prefix + '.ready'):
  557. continue
  558. if path.endswith('.ready'):
  559. gsutil.call('rm', path)
  560. continue
  561. gsutil.call('-m', 'rm', '-r', path)
  562. @staticmethod
  563. def DeleteTmpPackFiles(path):
  564. pack_dir = os.path.join(path, 'objects', 'pack')
  565. if not os.path.isdir(pack_dir):
  566. return
  567. pack_files = [
  568. f for f in os.listdir(pack_dir)
  569. if f.startswith('.tmp-') or f.startswith('tmp_pack_')
  570. ]
  571. for f in pack_files:
  572. f = os.path.join(pack_dir, f)
  573. try:
  574. os.remove(f)
  575. logging.warning('Deleted stale temporary pack file %s' % f)
  576. except OSError:
  577. logging.warning('Unable to delete temporary pack file %s' % f)
  578. @subcommand.usage('[url of repo to check for caching]')
  579. @metrics.collector.collect_metrics('git cache exists')
  580. def CMDexists(parser, args):
  581. """Check to see if there already is a cache of the given repo."""
  582. _, args = parser.parse_args(args)
  583. if not len(args) == 1:
  584. parser.error('git cache exists only takes exactly one repo url.')
  585. url = args[0]
  586. mirror = Mirror(url)
  587. if mirror.exists():
  588. print(mirror.mirror_path)
  589. return 0
  590. return 1
  591. @subcommand.usage('[url of repo to create a bootstrap zip file]')
  592. @metrics.collector.collect_metrics('git cache update-bootstrap')
  593. def CMDupdate_bootstrap(parser, args):
  594. """Create and uploads a bootstrap tarball."""
  595. # Lets just assert we can't do this on Windows.
  596. if sys.platform.startswith('win'):
  597. print('Sorry, update bootstrap will not work on Windows.',
  598. file=sys.stderr)
  599. return 1
  600. if gclient_utils.IsEnvCog():
  601. print('updating bootstrap is not supported in non-git environment.',
  602. file=sys.stderr)
  603. return 1
  604. parser.add_option('--skip-populate',
  605. action='store_true',
  606. help='Skips "populate" step if mirror already exists.')
  607. parser.add_option('--gc-aggressive',
  608. action='store_true',
  609. help='Run aggressive repacking of the repo.')
  610. parser.add_option('--prune',
  611. action='store_true',
  612. help='Prune all other cached bundles of the same repo.')
  613. populate_args = args[:]
  614. options, args = parser.parse_args(args)
  615. url = args[0]
  616. mirror = Mirror(url)
  617. if not options.skip_populate or not mirror.exists():
  618. CMDpopulate(parser, populate_args)
  619. else:
  620. print('Skipped populate step.')
  621. # Get the repo directory.
  622. _, args2 = parser.parse_args(args)
  623. url = args2[0]
  624. mirror = Mirror(url)
  625. mirror.update_bootstrap(options.prune, options.gc_aggressive)
  626. return 0
  627. @subcommand.usage('[url of repo to add to or update in cache]')
  628. @metrics.collector.collect_metrics('git cache populate')
  629. def CMDpopulate(parser, args):
  630. """Ensure that the cache has all up-to-date objects for the given repo."""
  631. if gclient_utils.IsEnvCog():
  632. print('populating cache is not supported in non-git environment.',
  633. file=sys.stderr)
  634. return 1
  635. parser.add_option('--depth',
  636. type='int',
  637. help='Only cache DEPTH commits of history')
  638. parser.add_option(
  639. '--no-fetch-tags',
  640. action='store_true',
  641. help=('Don\'t fetch tags from the server. This can speed up '
  642. 'fetch considerably when there are many tags.'))
  643. parser.add_option('--shallow',
  644. '-s',
  645. action='store_true',
  646. help='Only cache 10000 commits of history')
  647. parser.add_option('--ref',
  648. action='append',
  649. help='Specify additional refs to be fetched')
  650. parser.add_option('--commit',
  651. action='append',
  652. help='Specify additional commits to be fetched')
  653. parser.add_option('--no_bootstrap',
  654. '--no-bootstrap',
  655. action='store_true',
  656. help='Don\'t bootstrap from Google Storage')
  657. parser.add_option('--ignore_locks',
  658. '--ignore-locks',
  659. action='store_true',
  660. help='NOOP. This flag will be removed in the future.')
  661. parser.add_option(
  662. '--break-locks',
  663. action='store_true',
  664. help='Break any existing lock instead of just ignoring it')
  665. parser.add_option(
  666. '--reset-fetch-config',
  667. action='store_true',
  668. default=False,
  669. help='Reset the fetch config before populating the cache.')
  670. options, args = parser.parse_args(args)
  671. if not len(args) == 1:
  672. parser.error('git cache populate only takes exactly one repo url.')
  673. if options.ignore_locks:
  674. print('ignore_locks is no longer used. Please remove its usage.')
  675. if options.break_locks:
  676. print('break_locks is no longer used. Please remove its usage.')
  677. url = args[0]
  678. mirror = Mirror(url, refs=options.ref, commits=options.commit)
  679. kwargs = {
  680. 'no_fetch_tags': options.no_fetch_tags,
  681. 'verbose': options.verbose,
  682. 'shallow': options.shallow,
  683. 'bootstrap': not options.no_bootstrap,
  684. 'lock_timeout': options.timeout,
  685. 'reset_fetch_config': options.reset_fetch_config,
  686. }
  687. if options.depth:
  688. kwargs['depth'] = options.depth
  689. mirror.populate(**kwargs)
  690. @subcommand.usage('Fetch new commits into cache and current checkout')
  691. @metrics.collector.collect_metrics('git cache fetch')
  692. def CMDfetch(parser, args):
  693. """Update mirror, and fetch in cwd."""
  694. if gclient_utils.IsEnvCog():
  695. print(
  696. 'fetching new commits into cache is not supported in non-git '
  697. 'environment.',
  698. file=sys.stderr)
  699. return 1
  700. parser.add_option('--all', action='store_true', help='Fetch all remotes')
  701. parser.add_option('--no_bootstrap',
  702. '--no-bootstrap',
  703. action='store_true',
  704. help='Don\'t (re)bootstrap from Google Storage')
  705. parser.add_option(
  706. '--no-fetch-tags',
  707. action='store_true',
  708. help=('Don\'t fetch tags from the server. This can speed up '
  709. 'fetch considerably when there are many tags.'))
  710. options, args = parser.parse_args(args)
  711. # Figure out which remotes to fetch. This mimics the behavior of regular
  712. # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
  713. # this will NOT try to traverse up the branching structure to find the
  714. # ultimate remote to update.
  715. remotes = []
  716. if options.all:
  717. assert not args, 'fatal: fetch --all does not take repository argument'
  718. remotes = subprocess.check_output([Mirror.git_exe, 'remote'])
  719. remotes = remotes.decode('utf-8', 'ignore').splitlines()
  720. elif args:
  721. remotes = args
  722. else:
  723. current_branch = subprocess.check_output(
  724. [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD'])
  725. current_branch = current_branch.decode('utf-8', 'ignore').strip()
  726. if current_branch != 'HEAD':
  727. upstream = subprocess.check_output(
  728. [Mirror.git_exe, 'config',
  729. 'branch.%s.remote' % current_branch])
  730. upstream = upstream.decode('utf-8', 'ignore').strip()
  731. if upstream and upstream != '.':
  732. remotes = [upstream]
  733. if not remotes:
  734. remotes = ['origin']
  735. cachepath = Mirror.GetCachePath()
  736. git_dir = os.path.abspath(
  737. subprocess.check_output([Mirror.git_exe, 'rev-parse',
  738. '--git-dir']).decode('utf-8', 'ignore'))
  739. git_dir = os.path.abspath(git_dir)
  740. if git_dir.startswith(cachepath):
  741. mirror = Mirror.FromPath(git_dir)
  742. mirror.populate(bootstrap=not options.no_bootstrap,
  743. no_fetch_tags=options.no_fetch_tags,
  744. lock_timeout=options.timeout)
  745. return 0
  746. for remote in remotes:
  747. remote_url = subprocess.check_output(
  748. [Mirror.git_exe, 'config',
  749. 'remote.%s.url' % remote])
  750. remote_url = remote_url.decode('utf-8', 'ignore').strip()
  751. if remote_url.startswith(cachepath):
  752. mirror = Mirror.FromPath(remote_url)
  753. mirror.print = lambda *args: None
  754. print('Updating git cache...')
  755. mirror.populate(bootstrap=not options.no_bootstrap,
  756. no_fetch_tags=options.no_fetch_tags,
  757. lock_timeout=options.timeout)
  758. subprocess.check_call([Mirror.git_exe, 'fetch', remote])
  759. return 0
  760. class OptionParser(optparse.OptionParser):
  761. """Wrapper class for OptionParser to handle global options."""
  762. def __init__(self, *args, **kwargs):
  763. optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
  764. self.add_option(
  765. '-c',
  766. '--cache-dir',
  767. help=('Path to the directory containing the caches. Normally '
  768. 'deduced from git config cache.cachepath or '
  769. '$GIT_CACHE_PATH.'))
  770. self.add_option(
  771. '-v',
  772. '--verbose',
  773. action='count',
  774. default=1,
  775. help='Increase verbosity (can be passed multiple times)')
  776. self.add_option('-q',
  777. '--quiet',
  778. action='store_true',
  779. help='Suppress all extraneous output')
  780. self.add_option('--timeout',
  781. type='int',
  782. default=0,
  783. help='Timeout for acquiring cache lock, in seconds')
  784. def parse_args(self, args=None, values=None):
  785. # Create an optparse.Values object that will store only the actual
  786. # passed options, without the defaults.
  787. actual_options = optparse.Values()
  788. _, args = optparse.OptionParser.parse_args(self, args, actual_options)
  789. # Create an optparse.Values object with the default options.
  790. options = optparse.Values(self.get_default_values().__dict__)
  791. # Update it with the options passed by the user.
  792. options._update_careful(actual_options.__dict__)
  793. # Store the options passed by the user in an _actual_options attribute.
  794. # We store only the keys, and not the values, since the values can
  795. # contain arbitrary information, which might be PII.
  796. metrics.collector.add('arguments', list(actual_options.__dict__.keys()))
  797. if options.quiet:
  798. options.verbose = 0
  799. levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
  800. logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
  801. try:
  802. global_cache_dir = Mirror.GetCachePath()
  803. except RuntimeError:
  804. global_cache_dir = None
  805. if options.cache_dir:
  806. if global_cache_dir and (os.path.abspath(options.cache_dir) !=
  807. os.path.abspath(global_cache_dir)):
  808. logging.warning(
  809. 'Overriding globally-configured cache directory.')
  810. Mirror.SetCachePath(options.cache_dir)
  811. return options, args
  812. def main(argv):
  813. dispatcher = subcommand.CommandDispatcher(__name__)
  814. return dispatcher.execute(OptionParser(), argv)
  815. if __name__ == '__main__':
  816. try:
  817. with metrics.collector.print_notice_and_exit():
  818. sys.exit(main(sys.argv[1:]))
  819. except KeyboardInterrupt:
  820. sys.stderr.write('interrupted\n')
  821. sys.exit(1)