فهرست منبع

[gclient] Read submodule status information

This allow us to skip sync if we know the state is correct.

R=gavinmak@google.com

Bug: 40283612, 40942309
Change-Id: I30fd5bfb9ca8ab0f7dcce567e2a5cb4aebdc7b2f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5480172
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
Reviewed-by: Gavin Mak <gavinmak@google.com>
Josip Sokcevic 1 سال پیش
والد
کامیت
39501ba8c9
3فایلهای تغییر یافته به همراه208 افزوده شده و 29 حذف شده
  1. 59 11
      gclient.py
  2. 77 3
      gclient_scm.py
  3. 72 15
      tests/gclient_scm_test.py

+ 59 - 11
gclient.py

@@ -295,7 +295,19 @@ class DependencySettings(object):
         # These are not mutable:
         self._parent = parent
         self._deps_file = deps_file
-        self._url = url
+
+        # Post process the url to remove trailing slashes.
+        if isinstance(url, str):
+            # urls are sometime incorrectly written as proto://host/path/@rev.
+            # Replace it to proto://host/path@rev.
+            self._url = url.replace('/@', '@')
+        elif isinstance(url, (None.__class__)):
+            self._url = url
+        else:
+            raise gclient_utils.Error(
+                ('dependency url must be either string or None, '
+                 'instead of %s') % url.__class__.__name__)
+
         # The condition as string (or None). Useful to keep e.g. for flatten.
         self._condition = condition
         # 'managed' determines whether or not this dependency is synced/updated
@@ -321,16 +333,6 @@ class DependencySettings(object):
         self._custom_deps = custom_deps or {}
         self._custom_hooks = custom_hooks or []
 
-        # Post process the url to remove trailing slashes.
-        if isinstance(self.url, str):
-            # urls are sometime incorrectly written as proto://host/path/@rev.
-            # Replace it to proto://host/path@rev.
-            self.set_url(self.url.replace('/@', '@'))
-        elif not isinstance(self.url, (None.__class__)):
-            raise gclient_utils.Error(
-                ('dependency url must be either string or None, '
-                 'instead of %s') % self.url.__class__.__name__)
-
         # Make any deps_file path platform-appropriate.
         if self._deps_file:
             for sep in ['/', '\\']:
@@ -485,6 +487,9 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
         # custom_deps, if any.
         self._should_sync = True
 
+        self._known_dependency_diff = None
+        self._dependency_index_state = None
+
         self._OverrideUrl()
         # This is inherited from WorkItem.  We want the URL to be a resource.
         if self.url and isinstance(self.url, str):
@@ -588,6 +593,14 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
         ])
         return s
 
+    @property
+    def known_dependency_diff(self):
+        return self._known_dependency_diff
+
+    @property
+    def dependency_index_state(self):
+        return self._dependency_index_state
+
     @property
     def requirements(self):
         """Calculate the list of requirements."""
@@ -929,6 +942,12 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
         if self.git_dependencies_state == gclient_eval.SUBMODULES:
             deps.update(self.ParseGitSubmodules())
 
+        if self.git_dependencies_state != gclient_eval.DEPS:
+            # Git submodules are used - get their state.
+            self._known_dependency_diff = self.CreateSCM().GetSubmoduleDiff()
+            self._dependency_index_state = self.CreateSCM(
+            ).GetSubmoduleStateFromIndex()
+
         deps_to_add = self._deps_to_objects(
             self._postprocess_deps(deps, rel_prefix), self._use_relative_paths)
 
@@ -1156,6 +1175,20 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
                 try:
                     start = time.time()
                     sync_status = metrics_utils.SYNC_STATUS_FAILURE
+                    if self.parent and self.parent.known_dependency_diff is not None:
+                        if self._use_relative_paths:
+                            path = self.name
+                        else:
+                            path = self.name[len(self.parent.name) + 1:]
+                        current_revision = None
+                        if path in self.parent.dependency_index_state:
+                            current_revision = self.parent.dependency_index_state[
+                                path]
+                        if path in self.parent.known_dependency_diff:
+                            current_revision = self.parent.known_dependency_diff[
+                                path][1]
+                        self._used_scm.current_revision = current_revision
+
                     self._got_revision = self._used_scm.RunCommand(
                         command, options, args, file_list)
                     latest_commit = self._got_revision
@@ -2120,6 +2153,21 @@ it or fix the checkout.
 
         removed_cipd_entries = []
         read_entries = self._ReadEntries()
+        # Add known dependency state
+        queue = list(self.dependencies)
+        while len(queue) > 0:
+            dep = queue.pop()
+            queue.extend(dep.dependencies)
+            if not dep._known_dependency_diff:
+                continue
+
+            for k, v in dep._known_dependency_diff.items():
+                path = f'{dep.name}/{k}'
+                if path in read_entries:
+                    continue
+                read_entries[path] = f'https://unknown@{v[1]}'
+
+
         # We process entries sorted in reverse to ensure a child dir is
         # always deleted before its parent dir.
         # This is especially important for submodules with pinned revisions

+ 77 - 3
gclient_scm.py

@@ -229,6 +229,7 @@ class GitWrapper(SCMWrapper):
             filter_kwargs['predicate'] = self.out_cb
         self.filter = gclient_utils.GitFilter(**filter_kwargs)
         self._running_under_rosetta = None
+        self.current_revision = None
 
     def GetCheckoutRoot(self):
         return scm.GIT.GetCheckoutRoot(self.checkout_path)
@@ -250,6 +251,69 @@ class GitWrapper(SCMWrapper):
                     ['-c', 'core.quotePath=false', 'diff', '--name-only', base])
             )).split()
 
+    def GetSubmoduleStateFromIndex(self):
+        """Returns a map where keys are submodule names and values are commit
+        hashes. It reads data from the Git index, so only committed values are
+        present."""
+        out = self._Capture(['ls-files', '-s'])
+        result = {}
+        for l in out.split('\n'):
+            if not l.startswith('160000'):
+                # Not a submodule
+                continue
+            (_, commit, _, filepath) = l.split(maxsplit=3)
+            result[filepath] = commit
+        return result
+
+    def GetSubmoduleDiff(self):
+        """Returns a map where keys are submodule names and values are tuples of
+        (old_commit_hash, new_commit_hash). old_commit_hash matches the Git
+        index, whereas new_commit_hash matches currently checked out commit
+        hash."""
+        out = self._Capture([
+            'diff', '--no-prefix', '--no-ext-diff', '--no-color',
+            '--ignore-submodules=dirty', '--submodule=short'
+        ])
+        NO_COMMIT = 40 * '0'
+        committed_submodule = None
+        checked_submodule = None
+        filepath = None
+        state = 0
+        diff = {}
+        # Parsing git diff uses simple state machine. States:
+        # 0 - start state
+        # 1 - diff file/line detected, ready to process content
+        # 2 - gitlink detected, ready to process gitlink past and current
+        # content.
+        # 3 - past gitlink content detected. It contains a commit hash that's in
+        # git index.
+        # 4 - new gitlink content detected. It contains currently checked
+        # commit. At this point, we have all information needed, and we can
+        # reset state to 0.
+        for l in out.split('\n'):
+            if l.startswith('diff --git'):
+                # New file detected, reset state.
+                state = 1
+            elif state == 1 and l.startswith('index') and l.endswith('160000'):
+                # We detected gitlink
+                state = 2
+            elif state == 2 and l.startswith('+++ '):
+                # This line contains filename
+                filepath = l[4:]
+                state = 3
+            elif state == 3 and l.startswith('-Subproject commit '):
+                # This line contains what commit hash Git index expects
+                # (ls-files).
+                committed_submodule = l.split(' ')[-1]
+                state = 4
+            elif state == 4 and l.startswith('+Subproject commit '):
+                # This line contains currently checked out commit for this submodule.
+                checked_submodule = l.split(' ')[-1]
+                if NO_COMMIT not in (committed_submodule, checked_submodule):
+                    diff[filepath] = (committed_submodule, checked_submodule)
+                state = 0
+        return diff
+
     def diff(self, options, _args, _file_list):
         _, revision = gclient_utils.SplitUrlRevision(self.url)
         if not revision:
@@ -638,7 +702,6 @@ class GitWrapper(SCMWrapper):
             raise gclient_utils.Error("Unsupported argument(s): %s" %
                                       ",".join(args))
 
-        current_revision = None
         url, deps_revision = gclient_utils.SplitUrlRevision(self.url)
         revision = deps_revision
         managed = True
@@ -714,11 +777,11 @@ class GitWrapper(SCMWrapper):
                 self._UpdateMirrorIfNotContains(mirror, options, rev_type,
                                                 revision)
             try:
-                current_revision = self._Clone(revision, url, options)
+                self.current_revision = self._Clone(revision, url, options)
             except subprocess2.CalledProcessError as e:
                 logging.warning('Clone failed due to: %s', e)
                 self._DeleteOrMove(options.force)
-                current_revision = self._Clone(revision, url, options)
+                self.current_revision = self._Clone(revision, url, options)
             if file_list is not None:
                 files = self._Capture(
                     ['-c', 'core.quotePath=false', 'ls-files']).splitlines()
@@ -742,6 +805,17 @@ class GitWrapper(SCMWrapper):
                        self.relpath)
             return self._Capture(['rev-parse', '--verify', 'HEAD'])
 
+        # Special case for rev_type = hash. If we use submodules, we can check
+        # information already.
+        if rev_type == 'hash':
+            if self.current_revision == revision:
+                if verbose:
+                    self.Print('Using submodule information to skip check')
+                if options.reset or options.force:
+                    self._Scrub('HEAD', options)
+
+                return revision
+
         self._maybe_break_locks(options)
 
         if mirror:

+ 72 - 15
tests/gclient_scm_test.py

@@ -139,6 +139,7 @@ Personalized
 from :3
 M 100644 :4 a
 M 100644 :5 b
+M 160000 1111111111111111111111111111111111111111 submodule
 
 blob
 mark :7
@@ -267,7 +268,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
         scm.revert(options, self.args, file_list)
         self.assertEqual(file_list, [])
         self.assertEqual(scm.revinfo(options, self.args, None),
-                         'a7142dc9f0009350b96a11f372b6ea658592aa95')
+                         '4091c7d010ca99d0f2dd416d4b70b758ae432187')
         sys.stdout.close()
 
     def testRevertModified(self):
@@ -287,7 +288,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
         scm.diff(options, self.args, file_list)
         self.assertEqual(file_list, [])
         self.assertEqual(scm.revinfo(options, self.args, None),
-                         'a7142dc9f0009350b96a11f372b6ea658592aa95')
+                         '4091c7d010ca99d0f2dd416d4b70b758ae432187')
         sys.stdout.close()
 
     def testRevertNew(self):
@@ -309,7 +310,7 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
         scm.diff(options, self.args, file_list)
         self.assertEqual(file_list, [])
         self.assertEqual(scm.revinfo(options, self.args, None),
-                         'a7142dc9f0009350b96a11f372b6ea658592aa95')
+                         '4091c7d010ca99d0f2dd416d4b70b758ae432187')
         sys.stdout.close()
 
     def testStatusRef(self):
@@ -389,14 +390,16 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
         if not self.enabled:
             return
         options = self.Options()
-        expected_file_list = [join(self.base_path, x) for x in ['a', 'b']]
+        expected_file_list = [
+            join(self.base_path, x) for x in ['a', 'b', 'submodule']
+        ]
         scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath)
         file_list = []
 
         scm.update(options, (), file_list)
         self.assertEqual(file_list, expected_file_list)
         self.assertEqual(scm.revinfo(options, (), None),
-                         'a7142dc9f0009350b96a11f372b6ea658592aa95')
+                         '4091c7d010ca99d0f2dd416d4b70b758ae432187')
         self.assertEqual(
             scm._Capture(['config', '--get', 'diff.ignoreSubmodules']), 'dirty')
         self.assertEqual(
@@ -418,12 +421,13 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
         rev = scm.revinfo(options, (), None)
         file_list = []
         scm.update(options, (), file_list)
-        self.assertEqual(file_list,
-                         [join(self.base_path, x) for x in ['a', 'b', 'c']])
+        self.assertEqual(
+            file_list,
+            [join(self.base_path, x) for x in ['a', 'b', 'c', 'submodule']])
         # The actual commit that is created is unstable, so we verify its tree
         # and parents instead.
         self.assertEqual(scm._Capture(['rev-parse', 'HEAD:']),
-                         'd2e35c10ac24d6c621e14a1fcadceb533155627d')
+                         '3a3ba72731fa208d37b06598a129ba93970325df')
         parent = 'HEAD^' if sys.platform != 'win32' else 'HEAD^^'
         self.assertEqual(scm._Capture(['rev-parse', parent + '1']), rev)
         self.assertEqual(scm._Capture(['rev-parse', parent + '2']),
@@ -445,12 +449,13 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
             '(y)es / (q)uit / (s)kip : ', 'y')
         file_list = []
         scm.update(options, (), file_list)
-        self.assertEqual(file_list,
-                         [join(self.base_path, x) for x in ['a', 'b', 'c']])
+        self.assertEqual(
+            file_list,
+            [join(self.base_path, x) for x in ['a', 'b', 'c', 'submodule']])
         # The actual commit that is created is unstable, so we verify its tree
         # and parent instead.
         self.assertEqual(scm._Capture(['rev-parse', 'HEAD:']),
-                         'd2e35c10ac24d6c621e14a1fcadceb533155627d')
+                         '3a3ba72731fa208d37b06598a129ba93970325df')
         parent = 'HEAD^' if sys.platform != 'win32' else 'HEAD^^'
         self.assertEqual(scm._Capture(['rev-parse', parent + '1']),
                          scm._Capture(['rev-parse', 'origin/main']))
@@ -801,14 +806,15 @@ class UnmanagedGitWrapperTestCase(BaseGitWrapperTestCase):
         self.relpath = '.'
         self.base_path = join(self.root_dir, self.relpath)
         url_with_commit_ref = origin_root_dir +\
-                              '@a7142dc9f0009350b96a11f372b6ea658592aa95'
+                              '@4091c7d010ca99d0f2dd416d4b70b758ae432187'
 
         scm = gclient_scm.GitWrapper(url_with_commit_ref, self.root_dir,
                                      self.relpath)
 
         expected_file_list = [
             join(self.base_path, "a"),
-            join(self.base_path, "b")
+            join(self.base_path, "b"),
+            join(self.base_path, "submodule"),
         ]
         file_list = []
         options.revision = 'unmanaged'
@@ -816,11 +822,11 @@ class UnmanagedGitWrapperTestCase(BaseGitWrapperTestCase):
 
         self.assertEqual(file_list, expected_file_list)
         self.assertEqual(scm.revinfo(options, (), None),
-                         'a7142dc9f0009350b96a11f372b6ea658592aa95')
+                         '4091c7d010ca99d0f2dd416d4b70b758ae432187')
         # indicates detached HEAD
         self.assertEqual(self.getCurrentBranch(), None)
         self.checkInStdout(
-            'Checked out a7142dc9f0009350b96a11f372b6ea658592aa95 to a detached HEAD'
+            'Checked out 4091c7d010ca99d0f2dd416d4b70b758ae432187 to a detached HEAD'
         )
 
     def testUpdateCloneOnBranch(self):
@@ -1602,6 +1608,57 @@ class CheckDiffTest(fake_repos.FakeReposTestBase):
         self.assertFalse(scm.check_diff(self.githash('repo_1', 2)))
 
 
+class Submodules(BaseGitWrapperTestCase):
+    submodule_hash = '1111111111111111111111111111111111111111'
+
+    def testGetSubmoduleClean(self):
+        scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath)
+        options = self.Options()
+        scm.update(options, None, [])
+        self.assertEqual(scm.GetSubmoduleStateFromIndex(),
+                         {'submodule': self.submodule_hash})
+        self.assertEqual(scm.GetSubmoduleDiff(), {})
+
+    def testGetSubmoduleModified(self):
+        scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath)
+        options = self.Options()
+        scm.update(options, None, [])
+
+        # Create submodule diff
+        submodule_dir = os.path.join(self.root_dir, 'submodule')
+        subprocess2.check_output(['git', '-C', submodule_dir, 'init'])
+        subprocess2.check_output([
+            'git', '-C', submodule_dir, 'commit', '-m', 'foo', '--allow-empty'
+        ])
+        new_rev = subprocess2.check_output(
+            ['git', '-C', submodule_dir, 'rev-parse',
+             'HEAD']).decode('utf-8').strip()
+
+        # And file diff
+        with open(os.path.join(self.root_dir, 'a'), 'w') as f:
+            f.write('foo')
+
+        self.assertEqual(scm.GetSubmoduleStateFromIndex(),
+                         {'submodule': self.submodule_hash})
+
+        self.assertEqual(scm.GetSubmoduleDiff(),
+                         {'submodule': (self.submodule_hash, new_rev)})
+
+    def testGetSubmoduleDeleted(self):
+        scm = gclient_scm.GitWrapper(self.url, self.root_dir, self.relpath)
+        options = self.Options()
+        scm.update(options, None, [])
+        subprocess2.check_output(
+            ['git', '-C', self.root_dir, 'rm', 'submodule'])
+
+        # When git removes submodule, it's autmatically staged and content is
+        # unavailable. Therefore, the index shouldn't have any entries and diff
+        # should be empty.
+        self.assertEqual(scm.GetSubmoduleStateFromIndex(), {})
+
+        self.assertEqual(scm.GetSubmoduleDiff(), {})
+
+
 if 'unittest.util' in __import__('sys').modules:
     # Show full diff in self.assertEqual.
     __import__('sys').modules['unittest.util']._MAX_LENGTH = 999999999