소스 검색

[gclient] Delete GCS output_dir on download

GCS output_directory needs to be cleaned up prior to extracting new
content. At the same time, it can't be cleaned up by individual GCS
fetcher as there can be multiple objects in the same output_dir.

This change adds a top level support to delete old GCS output_dir. If
any of objects need to be downloaded, the output_dir will be completely
deleted and all objects will be redownloaded.

R=jojwang@google.com

Bug: 342522902, 338612245
Change-Id: Icbe4f1238cac54d7390bbb9b6fc5f17c538cca62
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5570466
Reviewed-by: Joanna Wang <jojwang@chromium.org>
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
Josip Sokcevic 1 년 전
부모
커밋
7d95eb2eb0
1개의 변경된 파일91개의 추가작업 그리고 53개의 파일을 삭제
  1. 91 53
      gclient.py

+ 91 - 53
gclient.py

@@ -775,6 +775,9 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
                                        relative=use_relative_paths,
                                        relative=use_relative_paths,
                                        condition=condition))
                                        condition=condition))
             elif dep_type == 'gcs':
             elif dep_type == 'gcs':
+                if len(dep_value['objects']) == 0:
+                    continue
+
                 # Validate that all objects are unique
                 # Validate that all objects are unique
                 object_name_set = {
                 object_name_set = {
                     o['object_name']
                     o['object_name']
@@ -784,13 +787,15 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
                     raise Exception('Duplicate object names detected in {} GCS '
                     raise Exception('Duplicate object names detected in {} GCS '
                                     'dependency.'.format(name))
                                     'dependency.'.format(name))
                 gcs_root = self.GetGcsRoot()
                 gcs_root = self.GetGcsRoot()
+
+                gcs_deps = []
                 for obj in dep_value['objects']:
                 for obj in dep_value['objects']:
                     merged_condition = gclient_utils.merge_conditions(
                     merged_condition = gclient_utils.merge_conditions(
                         condition, obj.get('condition'))
                         condition, obj.get('condition'))
                     should_process_object = should_process and _should_process(
                     should_process_object = should_process and _should_process(
                         merged_condition)
                         merged_condition)
 
 
-                    deps_to_add.append(
+                    gcs_deps.append(
                         GcsDependency(parent=self,
                         GcsDependency(parent=self,
                                       name=name,
                                       name=name,
                                       bucket=dep_value['bucket'],
                                       bucket=dep_value['bucket'],
@@ -803,6 +808,18 @@ class Dependency(gclient_utils.WorkItem, DependencySettings):
                                       should_process=should_process_object,
                                       should_process=should_process_object,
                                       relative=use_relative_paths,
                                       relative=use_relative_paths,
                                       condition=merged_condition))
                                       condition=merged_condition))
+                deps_to_add.extend(gcs_deps)
+
+                # Check if at least one object needs to be downloaded.
+                needs_download = any(gcs.IsDownloadNeeded() for gcs in gcs_deps)
+                if needs_download and os.path.exists(gcs_deps[0].output_dir):
+                    # Since we don't know what old content to remove, we remove
+                    # the entire output_dir. All gcs_deps are expected to have
+                    # the same output_dir, so we get the first one, which must
+                    # exist.
+                    logging.warning(
+                        'GCS dependency %s new version, removing old.', name)
+                    shutil.rmtree(gcs_deps[0].output_dir)
             else:
             else:
                 url = dep_value.get('url')
                 url = dep_value.get('url')
                 deps_to_add.append(
                 deps_to_add.append(
@@ -2622,6 +2639,40 @@ class GcsDependency(Dependency):
                                             relative=relative,
                                             relative=relative,
                                             condition=condition)
                                             condition=condition)
 
 
+    @property
+    def output_dir(self):
+        return os.path.join(self.root.root_dir, self.name.split(':')[0])
+
+    @property
+    def gcs_file_name(self):
+        return self.object_name.replace('/', '_')
+
+    @property
+    def artifact_output_file(self):
+        return os.path.join(self.output_dir, self.output_file
+                            or f'.{self.gcs_file_name}')
+
+    @property
+    def hash_file(self):
+        return os.path.join(self.output_dir, f'.{self.file_prefix}_hash')
+
+    @property
+    def migration_toggle_file(self):
+        return os.path.join(
+            self.output_dir,
+            download_from_google_storage.construct_migration_file_name(
+                self.object_name))
+
+    @property
+    def gcs_file_name(self):
+        # Replace forward slashes
+        return self.object_name.replace('/', '_')
+
+    @property
+    def file_prefix(self):
+        # Drop any extensions
+        return self.gcs_file_name.replace('.', '_')
+
     #override
     #override
     def verify_validity(self):
     def verify_validity(self):
         """GCS dependencies allow duplicate name for objects in same directory."""
         """GCS dependencies allow duplicate name for objects in same directory."""
@@ -2645,25 +2696,24 @@ class GcsDependency(Dependency):
             f.write(content)
             f.write(content)
             f.write('\n')
             f.write('\n')
 
 
-    def IsDownloadNeeded(self, output_dir, output_file, hash_file,
-                         migration_toggle_file):
+    def IsDownloadNeeded(self):
         """Check if download and extract is needed."""
         """Check if download and extract is needed."""
-        if not os.path.exists(output_file):
+        if not self.should_process:
+            return False
+        if not os.path.exists(self.artifact_output_file):
             return True
             return True
 
 
         existing_hash = None
         existing_hash = None
-        if os.path.exists(hash_file):
+        if os.path.exists(self.hash_file):
             try:
             try:
-                with open(hash_file, 'r') as f:
+                with open(self.hash_file, 'r') as f:
                     existing_hash = f.read().rstrip()
                     existing_hash = f.read().rstrip()
             except IOError:
             except IOError:
                 return True
                 return True
         else:
         else:
             return True
             return True
 
 
-        # (b/328065301): Remove is_first_class_gcs_file logic when all GCS
-        # hooks are migrated to first class deps
-        is_first_class_gcs = os.path.exists(migration_toggle_file)
+        is_first_class_gcs = os.path.exists(self.migration_toggle_file)
         if not is_first_class_gcs:
         if not is_first_class_gcs:
             return True
             return True
 
 
@@ -2702,63 +2752,50 @@ class GcsDependency(Dependency):
 
 
     def DownloadGoogleStorage(self):
     def DownloadGoogleStorage(self):
         """Calls GCS."""
         """Calls GCS."""
-        # Replace forward slashes
-        gcs_file_name = self.object_name.replace('/', '_')
-        root_dir = self.root.root_dir
 
 
-        # Directory of the extracted tarfile contents
-        output_dir = os.path.join(root_dir, self.name.split(':')[0])
-        output_file = os.path.join(output_dir, self.output_file
-                                   or f'.{gcs_file_name}')
-
-        # Drop any extensions
-        file_prefix = gcs_file_name.replace('.', '_')
-        hash_file = os.path.join(output_dir, f'.{file_prefix}_hash')
-        migration_toggle_file = os.path.join(
-            output_dir,
-            download_from_google_storage.construct_migration_file_name(
-                self.object_name))
-        if not self.IsDownloadNeeded(output_dir, output_file, hash_file,
-                                     migration_toggle_file):
+        if not self.IsDownloadNeeded():
             return
             return
 
 
-        # Remove hashfile
-        if os.path.exists(hash_file):
-            os.remove(hash_file)
-
-        # Remove tarfile
-        if os.path.exists(output_file):
-            os.remove(output_file)
+        files_to_remove = [
+            self.hash_file,
+            self.artifact_output_file,
+            self.migration_toggle_file,
+        ]
+        for f in files_to_remove:
+            if os.path.exists(f):
+                os.remove(f)
 
 
-        # Another GCS dep could be using the same output_dir, so don't remove
-        # it
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
+        # Ensure that the directory exists. Another process/thread may create
+        # it, so exist_ok is used.
+        os.makedirs(self.output_dir, exist_ok=True)
 
 
         gsutil = download_from_google_storage.Gsutil(
         gsutil = download_from_google_storage.Gsutil(
             download_from_google_storage.GSUTIL_DEFAULT_PATH)
             download_from_google_storage.GSUTIL_DEFAULT_PATH)
         if os.getenv('GCLIENT_TEST') == '1':
         if os.getenv('GCLIENT_TEST') == '1':
-            if 'no-extract' in output_file:
-                with open(output_file, 'w+') as f:
+            if 'no-extract' in self.artifact_output_file:
+                with open(self.artifact_output_file, 'w+') as f:
                     f.write('non-extractable file')
                     f.write('non-extractable file')
             else:
             else:
                 # Create fake tar file and extracted tar contents
                 # Create fake tar file and extracted tar contents
                 tmpdir = tempfile.mkdtemp()
                 tmpdir = tempfile.mkdtemp()
-                copy_dir = os.path.join(tmpdir, gcs_file_name, 'extracted_dir')
+                copy_dir = os.path.join(tmpdir, self.gcs_file_name,
+                                        'extracted_dir')
                 if os.path.exists(copy_dir):
                 if os.path.exists(copy_dir):
                     shutil.rmtree(copy_dir)
                     shutil.rmtree(copy_dir)
                 os.makedirs(copy_dir)
                 os.makedirs(copy_dir)
                 with open(os.path.join(copy_dir, 'extracted_file'), 'w+') as f:
                 with open(os.path.join(copy_dir, 'extracted_file'), 'w+') as f:
                     f.write('extracted text')
                     f.write('extracted text')
-                with tarfile.open(output_file, "w:gz") as tar:
+                with tarfile.open(self.artifact_output_file, "w:gz") as tar:
                     tar.add(copy_dir, arcname=os.path.basename(copy_dir))
                     tar.add(copy_dir, arcname=os.path.basename(copy_dir))
         else:
         else:
-            code, _, err = gsutil.check_call('cp', self.url, output_file)
+            code, _, err = gsutil.check_call('cp', self.url,
+                                             self.artifact_output_file)
             if code and err:
             if code and err:
                 raise Exception(f'{code}: {err}')
                 raise Exception(f'{code}: {err}')
             # Check that something actually downloaded into the path
             # Check that something actually downloaded into the path
-            if not os.path.exists(output_file):
-                raise Exception(f'Nothing was downloaded into {output_file}')
+            if not os.path.exists(self.artifact_output_file):
+                raise Exception(
+                    f'Nothing was downloaded into {self.artifact_output_file}')
 
 
         calculated_sha256sum = ''
         calculated_sha256sum = ''
         calculated_size_bytes = None
         calculated_size_bytes = None
@@ -2767,8 +2804,9 @@ class GcsDependency(Dependency):
             calculated_size_bytes = 10000
             calculated_size_bytes = 10000
         else:
         else:
             calculated_sha256sum = (
             calculated_sha256sum = (
-                upload_to_google_storage_first_class.get_sha256sum(output_file))
-            calculated_size_bytes = os.path.getsize(output_file)
+                upload_to_google_storage_first_class.get_sha256sum(
+                    self.artifact_output_file))
+            calculated_size_bytes = os.path.getsize(self.artifact_output_file)
 
 
         if calculated_sha256sum != self.sha256sum:
         if calculated_sha256sum != self.sha256sum:
             raise Exception('sha256sum does not match calculated hash. '
             raise Exception('sha256sum does not match calculated hash. '
@@ -2784,8 +2822,8 @@ class GcsDependency(Dependency):
                                 calculated=calculated_size_bytes,
                                 calculated=calculated_size_bytes,
                             ))
                             ))
 
 
-        if tarfile.is_tarfile(output_file):
-            with tarfile.open(output_file, 'r:*') as tar:
+        if tarfile.is_tarfile(self.artifact_output_file):
+            with tarfile.open(self.artifact_output_file, 'r:*') as tar:
                 formatted_names = []
                 formatted_names = []
                 for name in tar.getnames():
                 for name in tar.getnames():
                     if name.startswith('./') and len(name) > 2:
                     if name.startswith('./') and len(name) > 2:
@@ -2800,19 +2838,19 @@ class GcsDependency(Dependency):
                     raise Exception('tarfile contains invalid entries')
                     raise Exception('tarfile contains invalid entries')
 
 
                 tar_content_file = os.path.join(
                 tar_content_file = os.path.join(
-                    output_dir, f'.{file_prefix}_content_names')
+                    self.output_dir, f'.{self.file_prefix}_content_names')
                 self.WriteToFile(json.dumps(tar.getnames()), tar_content_file)
                 self.WriteToFile(json.dumps(tar.getnames()), tar_content_file)
 
 
-                tar.extractall(path=output_dir)
+                tar.extractall(path=self.output_dir)
 
 
         if os.getenv('GCLIENT_TEST') != '1':
         if os.getenv('GCLIENT_TEST') != '1':
             code, err = download_from_google_storage.set_executable_bit(
             code, err = download_from_google_storage.set_executable_bit(
-                output_file, self.url, gsutil)
+                self.artifact_output_file, self.url, gsutil)
             if code != 0:
             if code != 0:
                 raise Exception(f'{code}: {err}')
                 raise Exception(f'{code}: {err}')
 
 
-        self.WriteToFile(calculated_sha256sum, hash_file)
-        self.WriteToFile(str(1), migration_toggle_file)
+        self.WriteToFile(calculated_sha256sum, self.hash_file)
+        self.WriteToFile(str(1), self.migration_toggle_file)
 
 
     #override
     #override
     def GetScmName(self):
     def GetScmName(self):