Răsfoiți Sursa

Add GCS root and .gcs_entries file to keep track of installed gcs deps

Now that multiple objects can share a directory, when objects are
removed, the directory should also remove the extracted contents
from that specific object. Since those exact contents are unknown, the
whole directory will be cleared.
If an entire GCS dep is added or removed, the corresponding directory
path will be cleared as well.

.gcs_entries holds a record of which GCS deps and objects
have been downloaded, per checkout. Example:
```
{
  "src": {
    "src/third_party/llvm-build/Release+Asserts": [
      "Linux_x64/llvmobjdump-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz",
      "Linux_x64/clang-llvmorg-19-init-2941-ga0b3dbaf-22.tar.xz"
    ],
    "src/third_party/node/linux": [
      "46795170ff5df9831955f163f6966abde581c8af"
    ]
  }
}
```

Bug: b/324418194
Change-Id: Icac113572523b61c83450880615418bf7df8bba7
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5407888
Reviewed-by: Joanna Wang <jojwang@chromium.org>
Commit-Queue: Stephanie Kim <kimstephanie@google.com>
Stephanie Kim 1 an în urmă
părinte
comite
589ccd8d54
2 a modificat fișierele cu 132 adăugiri și 2 ștergeri
  1. 28 2
      gclient.py
  2. 104 0
      gclient_scm.py

+ 28 - 2
gclient.py

@@ -765,6 +765,7 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
                 if len(object_name_set) != len(dep_value['objects']):
                 if len(object_name_set) != len(dep_value['objects']):
                     raise Exception('Duplicate object names detected in {} GCS '
                     raise Exception('Duplicate object names detected in {} GCS '
                                     'dependency.'.format(name))
                                     'dependency.'.format(name))
+                gcs_root = self.GetGcsRoot()
                 for obj in dep_value['objects']:
                 for obj in dep_value['objects']:
                     deps_to_add.append(
                     deps_to_add.append(
                         GcsDependency(parent=self,
                         GcsDependency(parent=self,
@@ -774,6 +775,7 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
                                       sha256sum=obj['sha256sum'],
                                       sha256sum=obj['sha256sum'],
                                       output_file=obj.get('output_file'),
                                       output_file=obj.get('output_file'),
                                       size_bytes=obj['size_bytes'],
                                       size_bytes=obj['size_bytes'],
+                                      gcs_root=gcs_root,
                                       custom_vars=self.custom_vars,
                                       custom_vars=self.custom_vars,
                                       should_process=should_process,
                                       should_process=should_process,
                                       relative=use_relative_paths,
                                       relative=use_relative_paths,
@@ -1223,6 +1225,12 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
 
 
         if self.should_recurse:
         if self.should_recurse:
             self.ParseDepsFile()
             self.ParseDepsFile()
+            gcs_root = self.GetGcsRoot()
+            if gcs_root:
+                if command == 'revert':
+                    gcs_root.clobber()
+                elif command == 'update':
+                    gcs_root.clobber_deps_with_updated_objects(self.name)
 
 
         self._run_is_done(file_list or [])
         self._run_is_done(file_list or [])
 
 
@@ -1236,6 +1244,9 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
             for s in self.dependencies:
             for s in self.dependencies:
                 if s.should_process:
                 if s.should_process:
                     work_queue.enqueue(s)
                     work_queue.enqueue(s)
+            gcs_root = self.GetGcsRoot()
+            if gcs_root and command == 'update':
+                gcs_root.resolve_objects(self.name)
 
 
         if command == 'recurse':
         if command == 'recurse':
             # Skip file only checkout.
             # Skip file only checkout.
@@ -1389,6 +1400,13 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
             return None
             return None
         return self.root.GetCipdRoot()
         return self.root.GetCipdRoot()
 
 
+    def GetGcsRoot(self):
+        if self.root is self:
+            # Let's not infinitely recurse. If this is root and isn't an
+            # instance of GClient, do nothing.
+            return None
+        return self.root.GetGcsRoot()
+
     def subtree(self, include_all):
     def subtree(self, include_all):
         """Breadth first recursion excluding root node."""
         """Breadth first recursion excluding root node."""
         dependencies = self.dependencies
         dependencies = self.dependencies
@@ -1709,6 +1727,7 @@ solutions = %(solution_list)s
         self._enforced_cpu = (detect_host_arch.HostArch(), )
         self._enforced_cpu = (detect_host_arch.HostArch(), )
         self._root_dir = root_dir
         self._root_dir = root_dir
         self._cipd_root = None
         self._cipd_root = None
+        self._gcs_root = None
         self.config_content = None
         self.config_content = None
 
 
     def _CheckConfig(self):
     def _CheckConfig(self):
@@ -2494,6 +2513,11 @@ it or fix the checkout.
                 log_level='info' if self._options.verbose else None)
                 log_level='info' if self._options.verbose else None)
         return self._cipd_root
         return self._cipd_root
 
 
+    def GetGcsRoot(self):
+        if not self._gcs_root:
+            self._gcs_root = gclient_scm.GcsRoot(self.root_dir)
+        return self._gcs_root
+
     @property
     @property
     def root_dir(self):
     def root_dir(self):
         """Root directory of gclient checkout."""
         """Root directory of gclient checkout."""
@@ -2517,14 +2541,16 @@ class GcsDependency(Dependency):
     """A Dependency object that represents a single GCS bucket and object"""
     """A Dependency object that represents a single GCS bucket and object"""
 
 
     def __init__(self, parent, name, bucket, object_name, sha256sum,
     def __init__(self, parent, name, bucket, object_name, sha256sum,
-                 output_file, size_bytes, custom_vars, should_process, relative,
-                 condition):
+                 output_file, size_bytes, gcs_root, custom_vars, should_process,
+                 relative, condition):
         self.bucket = bucket
         self.bucket = bucket
         self.object_name = object_name
         self.object_name = object_name
         self.sha256sum = sha256sum
         self.sha256sum = sha256sum
         self.output_file = output_file
         self.output_file = output_file
         self.size_bytes = size_bytes
         self.size_bytes = size_bytes
         url = f'gs://{self.bucket}/{self.object_name}'
         url = f'gs://{self.bucket}/{self.object_name}'
+        self._gcs_root = gcs_root
+        self._gcs_root.add_object(parent.name, name, object_name)
         super(GcsDependency, self).__init__(parent=parent,
         super(GcsDependency, self).__init__(parent=parent,
                                             name=f'{name}:{object_name}',
                                             name=f'{name}:{object_name}',
                                             url=url,
                                             url=url,

+ 104 - 0
gclient_scm.py

@@ -1913,6 +1913,110 @@ class CipdWrapper(SCMWrapper):
     """
     """
 
 
 
 
+class GcsRoot(object):
+    """Root to keep track of all GCS objects, per checkout"""
+
+    def __init__(self, root_dir):
+        self._mutator_lock = threading.Lock()
+        self._root_dir = root_dir
+        # Populated when the DEPS file is parsed
+        # The objects here have not yet been downloaded and written into
+        # the .gcs_entries file
+        self._parsed_objects = {}
+        # .gcs_entries keeps track of which GCS deps have already been installed
+        # Maps checkout_name -> {GCS dep path -> [object_name]}
+        # This file is in the same directory as .gclient
+        self._gcs_entries_file = os.path.join(self._root_dir, '.gcs_entries')
+        # Contents of the .gcs_entries file
+        self._gcs_entries = self.read_gcs_entries()
+
+    @property
+    def root_dir(self):
+        return self._root_dir
+
+    def add_object(self, checkout_name, dep_path, object_name):
+        """Records the object in the _parsed_objects variable
+
+        This does not actually download the object"""
+        with self._mutator_lock:
+            if checkout_name not in self._parsed_objects:
+                self._parsed_objects[checkout_name] = {}
+            if dep_path not in self._parsed_objects[checkout_name]:
+                self._parsed_objects[checkout_name][dep_path] = [object_name]
+            else:
+                self._parsed_objects[checkout_name][dep_path].append(
+                    object_name)
+
+    def read_gcs_entries(self):
+        """Reads .gcs_entries file and loads the content into _gcs_entries"""
+        if not os.path.exists(self._gcs_entries_file):
+            return {}
+
+        with open(self._gcs_entries_file, 'r') as f:
+            content = f.read().rstrip()
+            if content:
+                return json.loads(content)
+            return {}
+
+    def resolve_objects(self, checkout_name):
+        """Updates .gcs_entries with objects in _parsed_objects
+
+        This should only be called after the objects have been downloaded
+        and extracted."""
+        with self._mutator_lock:
+            object_dict = self._parsed_objects.get(checkout_name)
+            if not object_dict:
+                return
+            self._gcs_entries[checkout_name] = object_dict
+            with open(self._gcs_entries_file, 'w') as f:
+                f.write(json.dumps(self._gcs_entries, indent=2))
+            self._parsed_objects[checkout_name] = {}
+
+    def clobber_deps_with_updated_objects(self, checkout_name):
+        """Clobber the path if an object or GCS dependency is removed/added
+
+        This must be called before the GCS dependencies are
+        downloaded and extracted."""
+        with self._mutator_lock:
+            parsed_object_dict = self._parsed_objects.get(checkout_name, {})
+            parsed_paths = set(parsed_object_dict.keys())
+
+            resolved_object_dict = self._gcs_entries.get(checkout_name, {})
+            resolved_paths = set(resolved_object_dict.keys())
+
+            # If any GCS deps are added or removed entirely, clobber that path
+            intersected_paths = parsed_paths.intersection(resolved_paths)
+            # Added paths
+            for path in parsed_paths - intersected_paths:
+                full_path = os.path.join(self.root_dir, path)
+                gclient_utils.rmtree(full_path)
+            # Removed paths
+            for path in resolved_paths - intersected_paths:
+                full_path = os.path.join(self.root_dir, path)
+                gclient_utils.rmtree(full_path)
+
+            # If any objects within a GCS dep are added/removed, clobber that
+            # entire path
+            for path in intersected_paths:
+                resolved_objects = resolved_object_dict[path]
+                parsed_objects = parsed_object_dict[path]
+
+                full_path = os.path.join(self.root_dir, path)
+                if (len(resolved_objects) != len(parsed_objects)
+                        and os.path.exists(full_path)):
+                    gclient_utils.rmtree(full_path)
+
+    def clobber(self):
+        """Remove all dep path directories and clear .gcs_entries"""
+        for _, objects_dict in self._gcs_entries.items():
+            for dep_path, _ in objects_dict.items():
+                gclient_utils.rmtree(os.path.join(self.root_dir, dep_path))
+        if os.path.exists(self._gcs_entries_file):
+            os.remove(self._gcs_entries_file)
+        with self._mutator_lock:
+            self._gcs_entries = {}
+
+
 class GcsWrapper(SCMWrapper):
 class GcsWrapper(SCMWrapper):
     """Wrapper for GCS.
     """Wrapper for GCS.