Browse Source

Revert "git_cache: Remove locks"

This reverts commit c3eb3fa33551c957d0179472c908864da016d95a.

Reason for revert: lots of "runhooks" failure everywhere

Example: https://build.chromium.org/p/chromium.linux/builders/Linux%20Builder/builds/92720

Original change's description:
> git_cache: Remove locks
> 
> These aren't in use, and the original problem they were
> meant to solve has been solved at the gclient.py layer 
> using resource locking:
>   https://codereview.chromium.org/2049583003
> 
> Bug: 773008
> Change-Id: I6609f39d7f15604e0bb3d742a41c4f9fec87a57a
> Reviewed-on: https://chromium-review.googlesource.com/707728
> Reviewed-by: Aaron Gable <agable@chromium.org>
> Reviewed-by: Robbie Iannucci <iannucci@chromium.org>
> Commit-Queue: Ryan Tseng <hinoka@chromium.org>

TBR=iannucci@chromium.org,hinoka@chromium.org,agable@chromium.org,phajdan.jr@chromium.org

Change-Id: I31d5fef94f39f3a9f97b9e59121073b1f433d11e
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: 773008
Reviewed-on: https://chromium-review.googlesource.com/711054
Reviewed-by: Vadim Shtayura <vadimsh@chromium.org>
Commit-Queue: Vadim Shtayura <vadimsh@chromium.org>
Vadim Shtayura 7 years ago
parent
commit
08049e2db2

+ 9 - 0
gclient.py

@@ -1308,6 +1308,10 @@ it or fix the checkout.
     if cache_dir:
       cache_dir = os.path.join(self.root_dir, cache_dir)
       cache_dir = os.path.abspath(cache_dir)
+      # If running on a bot, force break any stale git cache locks.
+      if os.path.exists(cache_dir) and os.environ.get('CHROME_HEADLESS'):
+        subprocess2.check_call(['git', 'cache', 'unlock', '--cache-dir',
+                                cache_dir, '--force', '--all'])
     gclient_scm.GitWrapper.cache_dir = cache_dir
     git_cache.Mirror.SetCachePath(cache_dir)
 
@@ -2350,11 +2354,16 @@ def CMDsync(parser, args):
   parser.add_option('--no_bootstrap', '--no-bootstrap',
                     action='store_true',
                     help='Don\'t bootstrap from Google Storage.')
+  parser.add_option('--ignore_locks', action='store_true',
+                    help='GIT ONLY - Ignore cache locks.')
   parser.add_option('--break_repo_locks', action='store_true',
                     help='GIT ONLY - Forcibly remove repo locks (e.g. '
                       'index.lock). This should only be used if you know for '
                       'certain that this invocation of gclient is the only '
                       'thing operating on the git repos (e.g. on a bot).')
+  parser.add_option('--lock_timeout', type='int', default=5000,
+                    help='GIT ONLY - Deadline (in seconds) to wait for git '
+                         'cache lock to become available. Default is %default.')
   # TODO(agable): Remove these when the oldest CrOS release milestone is M56.
   parser.add_option('-t', '--transitive', action='store_true',
                     help='DEPRECATED: This is a no-op.')

+ 4 - 1
gclient_scm.py

@@ -855,7 +855,10 @@ class GitWrapper(SCMWrapper):
       depth = None
     mirror.populate(verbose=options.verbose,
                     bootstrap=not getattr(options, 'no_bootstrap', False),
-                    depth=depth)
+                    depth=depth,
+                    ignore_lock=getattr(options, 'ignore_locks', False),
+                    lock_timeout=getattr(options, 'lock_timeout', 0))
+    mirror.unlock()
 
   def _Clone(self, revision, url, options):
     """Clone a git repository from the given URL.

+ 217 - 15
git_cache.py

@@ -35,6 +35,9 @@ except NameError:
   class WinErr(Exception):
     pass
 
+class LockError(Exception):
+  pass
+
 class ClobberNeeded(Exception):
   pass
 
@@ -74,11 +77,122 @@ def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
       sleep_time *= 2
 
 
+class Lockfile(object):
+  """Class to represent a cross-platform process-specific lockfile."""
+
+  def __init__(self, path, timeout=0):
+    self.path = os.path.abspath(path)
+    self.timeout = timeout
+    self.lockfile = self.path + ".lock"
+    self.pid = os.getpid()
+
+  def _read_pid(self):
+    """Read the pid stored in the lockfile.
+
+    Note: This method is potentially racy. By the time it returns the lockfile
+    may have been unlocked, removed, or stolen by some other process.
+    """
+    try:
+      with open(self.lockfile, 'r') as f:
+        pid = int(f.readline().strip())
+    except (IOError, ValueError):
+      pid = None
+    return pid
+
+  def _make_lockfile(self):
+    """Safely creates a lockfile containing the current pid."""
+    open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+    fd = os.open(self.lockfile, open_flags, 0o644)
+    f = os.fdopen(fd, 'w')
+    print(self.pid, file=f)
+    f.close()
+
+  def _remove_lockfile(self):
+    """Delete the lockfile. Complains (implicitly) if it doesn't exist.
+
+    See gclient_utils.py:rmtree docstring for more explanation on the
+    windows case.
+    """
+    if sys.platform == 'win32':
+      lockfile = os.path.normcase(self.lockfile)
+
+      def delete():
+        exitcode = subprocess.call(['cmd.exe', '/c',
+                                    'del', '/f', '/q', lockfile])
+        if exitcode != 0:
+          raise LockError('Failed to remove lock: %s' % (lockfile,))
+      exponential_backoff_retry(
+          delete,
+          excs=(LockError,),
+          name='del [%s]' % (lockfile,))
+    else:
+      os.remove(self.lockfile)
+
+  def lock(self):
+    """Acquire the lock.
+
+    This will block with a deadline of self.timeout seconds.
+    """
+    elapsed = 0
+    while True:
+      try:
+        self._make_lockfile()
+        return
+      except OSError as e:
+        if elapsed < self.timeout:
+          sleep_time = max(10, min(3, self.timeout - elapsed))
+          logging.info('Could not create git cache lockfile; '
+                       'will retry after sleep(%d).', sleep_time);
+          elapsed += sleep_time
+          time.sleep(sleep_time)
+          continue
+        if e.errno == errno.EEXIST:
+          raise LockError("%s is already locked" % self.path)
+        else:
+          raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
+
+  def unlock(self):
+    """Release the lock."""
+    try:
+      if not self.is_locked():
+        raise LockError("%s is not locked" % self.path)
+      if not self.i_am_locking():
+        raise LockError("%s is locked, but not by me" % self.path)
+      self._remove_lockfile()
+    except WinErr:
+      # Windows is unreliable when it comes to file locking.  YMMV.
+      pass
+
+  def break_lock(self):
+    """Remove the lock, even if it was created by someone else."""
+    try:
+      self._remove_lockfile()
+      return True
+    except OSError as exc:
+      if exc.errno == errno.ENOENT:
+        return False
+      else:
+        raise
+
+  def is_locked(self):
+    """Test if the file is locked by anyone.
+
+    Note: This method is potentially racy. By the time it returns the lockfile
+    may have been unlocked, removed, or stolen by some other process.
+    """
+    return os.path.exists(self.lockfile)
+
+  def i_am_locking(self):
+    """Test if the file is locked by this process."""
+    return self.is_locked() and self.pid == self._read_pid()
+
+
 class Mirror(object):
 
   git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
   gsutil_exe = os.path.join(
     os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
+  cachepath_lock = threading.Lock()
 
   @staticmethod
   def parse_fetch_spec(spec):
@@ -140,21 +254,23 @@ class Mirror(object):
 
   @classmethod
   def SetCachePath(cls, cachepath):
-    setattr(cls, 'cachepath', cachepath)
+    with cls.cachepath_lock:
+      setattr(cls, 'cachepath', cachepath)
 
   @classmethod
   def GetCachePath(cls):
-    if not hasattr(cls, 'cachepath'):
-      try:
-        cachepath = subprocess.check_output(
-            [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip()
-      except subprocess.CalledProcessError:
-        cachepath = None
-      if not cachepath:
-        raise RuntimeError(
-            'No global cache.cachepath git configuration found.')
-      setattr(cls, 'cachepath', cachepath)
-    return getattr(cls, 'cachepath')
+    with cls.cachepath_lock:
+      if not hasattr(cls, 'cachepath'):
+        try:
+          cachepath = subprocess.check_output(
+              [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip()
+        except subprocess.CalledProcessError:
+          cachepath = None
+        if not cachepath:
+          raise RuntimeError(
+              'No global cache.cachepath git configuration found.')
+        setattr(cls, 'cachepath', cachepath)
+      return getattr(cls, 'cachepath')
 
   def Rename(self, src, dst):
     # This is somehow racy on Windows.
@@ -371,12 +487,17 @@ class Mirror(object):
           raise ClobberNeeded()  # Corrupted cache.
         logging.warn('Fetch of %s failed' % spec)
 
-  def populate(self, depth=None, shallow=False, bootstrap=False, verbose=False):
+  def populate(self, depth=None, shallow=False, bootstrap=False,
+               verbose=False, ignore_lock=False, lock_timeout=0):
     assert self.GetCachePath()
     if shallow and not depth:
       depth = 10000
     gclient_utils.safe_makedirs(self.GetCachePath())
 
+    lockfile = Lockfile(self.mirror_path, lock_timeout)
+    if not ignore_lock:
+      lockfile.lock()
+
     tempdir = None
     try:
       tempdir = self._ensure_bootstrapped(depth, bootstrap)
@@ -394,6 +515,8 @@ class Mirror(object):
         if os.path.exists(self.mirror_path):
           gclient_utils.rmtree(self.mirror_path)
         self.Rename(tempdir, self.mirror_path)
+      if not ignore_lock:
+        lockfile.unlock()
 
   def update_bootstrap(self, prune=False):
     # The files are named <git number>.zip
@@ -434,6 +557,45 @@ class Mirror(object):
       except OSError:
         logging.warn('Unable to delete temporary pack file %s' % f)
 
+  @classmethod
+  def BreakLocks(cls, path):
+    did_unlock = False
+    lf = Lockfile(path)
+    if lf.break_lock():
+      did_unlock = True
+    # Look for lock files that might have been left behind by an interrupted
+    # git process.
+    lf = os.path.join(path, 'config.lock')
+    if os.path.exists(lf):
+      os.remove(lf)
+      did_unlock = True
+    cls.DeleteTmpPackFiles(path)
+    return did_unlock
+
+  def unlock(self):
+    return self.BreakLocks(self.mirror_path)
+
+  @classmethod
+  def UnlockAll(cls):
+    cachepath = cls.GetCachePath()
+    if not cachepath:
+      return
+    dirlist = os.listdir(cachepath)
+    repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
+                     if os.path.isdir(os.path.join(cachepath, path))])
+    for dirent in dirlist:
+      if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
+        gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
+      elif (dirent.endswith('.lock') and
+          os.path.isfile(os.path.join(cachepath, dirent))):
+        repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
+
+    unlocked_repos = []
+    for repo_dir in repo_dirs:
+      if cls.BreakLocks(repo_dir):
+        unlocked_repos.append(repo_dir)
+
+    return unlocked_repos
 
 @subcommand.usage('[url of repo to check for caching]')
 def CMDexists(parser, args):
@@ -485,6 +647,9 @@ def CMDpopulate(parser, args):
   parser.add_option('--no_bootstrap', '--no-bootstrap',
                     action='store_true',
                     help='Don\'t bootstrap from Google Storage')
+  parser.add_option('--ignore_locks', '--ignore-locks',
+                    action='store_true',
+                    help='Don\'t try to lock repository')
 
   options, args = parser.parse_args(args)
   if not len(args) == 1:
@@ -496,6 +661,8 @@ def CMDpopulate(parser, args):
       'verbose': options.verbose,
       'shallow': options.shallow,
       'bootstrap': not options.no_bootstrap,
+      'ignore_lock': options.ignore_locks,
+      'lock_timeout': options.timeout,
   }
   if options.depth:
     kwargs['depth'] = options.depth
@@ -540,7 +707,7 @@ def CMDfetch(parser, args):
   if git_dir.startswith(cachepath):
     mirror = Mirror.FromPath(git_dir)
     mirror.populate(
-        bootstrap=not options.no_bootstrap,)
+        bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
     return 0
   for remote in remotes:
     remote_url = subprocess.check_output(
@@ -550,11 +717,44 @@ def CMDfetch(parser, args):
       mirror.print = lambda *args: None
       print('Updating git cache...')
       mirror.populate(
-          bootstrap=not options.no_bootstrap)
+          bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
     subprocess.check_call([Mirror.git_exe, 'fetch', remote])
   return 0
 
 
+@subcommand.usage('[url of repo to unlock, or -a|--all]')
+def CMDunlock(parser, args):
+  """Unlock one or all repos if their lock files are still around."""
+  parser.add_option('--force', '-f', action='store_true',
+                    help='Actually perform the action')
+  parser.add_option('--all', '-a', action='store_true',
+                    help='Unlock all repository caches')
+  options, args = parser.parse_args(args)
+  if len(args) > 1 or (len(args) == 0 and not options.all):
+    parser.error('git cache unlock takes exactly one repo url, or --all')
+
+  if not options.force:
+    cachepath = Mirror.GetCachePath()
+    lockfiles = [os.path.join(cachepath, path)
+                 for path in os.listdir(cachepath)
+                 if path.endswith('.lock') and os.path.isfile(path)]
+    parser.error('git cache unlock requires -f|--force to do anything. '
+                 'Refusing to unlock the following repo caches: '
+                 ', '.join(lockfiles))
+
+  unlocked_repos = []
+  if options.all:
+    unlocked_repos.extend(Mirror.UnlockAll())
+  else:
+    m = Mirror(args[0])
+    if m.unlock():
+      unlocked_repos.append(m.mirror_path)
+
+  if unlocked_repos:
+    logging.info('Broke locks on these caches:\n  %s' % '\n  '.join(
+        unlocked_repos))
+
+
 class OptionParser(optparse.OptionParser):
   """Wrapper class for OptionParser to handle global options."""
 
@@ -566,6 +766,8 @@ class OptionParser(optparse.OptionParser):
                     help='Increase verbosity (can be passed multiple times)')
     self.add_option('-q', '--quiet', action='store_true',
                     help='Suppress all extraneous output')
+    self.add_option('--timeout', type='int', default=0,
+                    help='Timeout for acquiring cache lock, in seconds')
 
   def parse_args(self, args=None, values=None):
     options, args = optparse.OptionParser.parse_args(self, args, values)

+ 41 - 2
recipes/recipe_modules/bot_update/resources/bot_update.py

@@ -331,7 +331,7 @@ def gclient_sync(
   os.close(fd)
 
   args = ['sync', '--verbose', '--reset', '--force',
-         '--output-json', gclient_output_file,
+         '--ignore_locks', '--output-json', gclient_output_file,
          '--nohooks', '--noprehooks', '--delete_unversioned_trees']
   if with_branch_heads:
     args += ['--with_branch_heads']
@@ -470,9 +470,36 @@ def is_broken_repo_dir(repo_dir):
   return not path.exists(os.path.join(repo_dir, '.git', 'config'))
 
 
+def _maybe_break_locks(checkout_path):
+  """This removes all .lock files from this repo's .git directory.
+
+  In particular, this will cleanup index.lock files, as well as ref lock
+  files.
+  """
+  git_dir = os.path.join(checkout_path, '.git')
+  for dirpath, _, filenames in os.walk(git_dir):
+    for filename in filenames:
+      if filename.endswith('.lock'):
+        to_break = os.path.join(dirpath, filename)
+        print 'breaking lock: %s' % to_break
+        try:
+          os.remove(to_break)
+        except OSError as ex:
+          print 'FAILED to break lock: %s: %s' % (to_break, ex)
+          raise
+
+
 def git_checkout(solutions, revisions, shallow, refs, git_cache_dir,
                  cleanup_dir):
   build_dir = os.getcwd()
+  # Before we do anything, break all git_cache locks.
+  if path.isdir(git_cache_dir):
+    git('cache', 'unlock', '-vv', '--force', '--all',
+        '--cache-dir', git_cache_dir)
+    for item in os.listdir(git_cache_dir):
+      filename = os.path.join(git_cache_dir, item)
+      if item.endswith('.lock'):
+        raise Exception('%s exists after cache unlock' % filename)
   first_solution = True
   for sln in solutions:
     # Just in case we're hitting a different git server than the one from
@@ -489,7 +516,7 @@ def git_checkout(solutions, revisions, shallow, refs, git_cache_dir,
         shallow = False
       sln_dir = path.join(build_dir, name)
       s = ['--shallow'] if shallow else []
-      populate_cmd = (['cache', 'populate', '-v',
+      populate_cmd = (['cache', 'populate', '--ignore_locks', '-v',
                        '--cache-dir', git_cache_dir] + s + [url])
       for ref in refs:
         populate_cmd.extend(['--ref', ref])
@@ -517,6 +544,18 @@ def git_checkout(solutions, revisions, shallow, refs, git_cache_dir,
           refspec = '%s:%s' % (ref, ref.lstrip('+'))
           git('fetch', 'origin', refspec, cwd=sln_dir)
 
+        # Windows sometimes has trouble deleting files.
+        # This can make git commands that rely on locks fail.
+        # Try a few times in case Windows has trouble again (and again).
+        if sys.platform.startswith('win'):
+          tries = 3
+          while tries:
+            try:
+              _maybe_break_locks(sln_dir)
+              break
+            except Exception:
+              tries -= 1
+
         revision = get_target_revision(name, url, revisions) or 'HEAD'
         force_revision(sln_dir, revision)
         done = True

+ 14 - 0
tests/bot_update_coverage_test.py

@@ -231,6 +231,20 @@ class BotUpdateUnittests(unittest.TestCase):
         gclient_sync_cmd = args
     self.assertTrue('--break_repo_locks' in gclient_sync_cmd)
 
+  def testGitCheckoutBreaksLocks(self):
+    self.overrideSetupForWindows()
+    path = '/b/build/slave/foo/build/.git'
+    lockfile = 'index.lock'
+    removed = []
+    old_os_walk = os.walk
+    old_os_remove = os.remove
+    setattr(os, 'walk', lambda _: [(path, None, [lockfile])])
+    setattr(os, 'remove', removed.append)
+    bot_update.ensure_checkout(**self.params)
+    setattr(os, 'walk', old_os_walk)
+    setattr(os, 'remove', old_os_remove)
+    self.assertTrue(os.path.join(path, lockfile) in removed)
+
 
 if __name__ == '__main__':
   unittest.main()