|
@@ -0,0 +1,248 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+# Copyright (c) 2024 The Chromium Authors. All rights reserved.
|
|
|
+# Use of this source code is governed by a BSD-style license that can be
|
|
|
+# found in the LICENSE file.
|
|
|
+"""Uploads files to Google Storage and output DEPS blob."""
|
|
|
+
|
|
|
+import hashlib
|
|
|
+import optparse
|
|
|
+import os
|
|
|
+import json
|
|
|
+import tempfile
|
|
|
+
|
|
|
+import re
|
|
|
+import sys
|
|
|
+import tarfile
|
|
|
+
|
|
|
+from download_from_google_storage import Gsutil
|
|
|
+from download_from_google_storage import GSUTIL_DEFAULT_PATH
|
|
|
+
|
|
|
+USAGE_STRING = """%prog [options] target [target2 ...].
|
|
|
+Target(s) is the files or directies intended to be uploaded to Google Storage.
|
|
|
+If a single target is a directory, it will be compressed and uploaded as a
|
|
|
+tar.gz file.
|
|
|
+If target is "-", then a list of directories will be taken from standard input.
|
|
|
+The list of directories will be compressed together and uploaded as one tar.gz
|
|
|
+file.
|
|
|
+
|
|
|
+Example usage
|
|
|
+------------
|
|
|
+./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
|
|
|
+--object-name my_object_name hello_world.txt
|
|
|
+
|
|
|
+./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
|
|
|
+--object-name my_object_name my_dir1
|
|
|
+
|
|
|
+./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
|
|
|
+--object-name my_object_name my_dir1 my_dir2
|
|
|
+
|
|
|
+Scan the current directory and upload all files larger than 1MB:
|
|
|
+find . -name .svn -prune -o -size +1000k -type f -print0 |
|
|
|
+./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
|
|
|
+--object-name my_object_name -
|
|
|
+"""
|
|
|
+
|
|
|
+
|
|
|
+def get_targets(args: list[str], parser: optparse.OptionParser,
|
|
|
+ use_null_terminator: bool) -> list[str]:
|
|
|
+ """Get target(s) to upload to GCS"""
|
|
|
+ if not args:
|
|
|
+ parser.error('Missing target.')
|
|
|
+
|
|
|
+ if len(args) == 1 and args[0] == '-':
|
|
|
+ # Take stdin as a newline or null separated list of files.
|
|
|
+ if use_null_terminator:
|
|
|
+ return sys.stdin.read().split('\0')
|
|
|
+
|
|
|
+ return sys.stdin.read().splitlines()
|
|
|
+
|
|
|
+ return args
|
|
|
+
|
|
|
+
|
|
|
+def create_archive(dirs: list[str]) -> str:
|
|
|
+ """Given a list of directories, compress them all into one tar file"""
|
|
|
+ # tarfile name cannot have a forward slash or else an error will be
|
|
|
+ # thrown
|
|
|
+ _, filename = tempfile.mkstemp(suffix='.tar.gz')
|
|
|
+ with tarfile.open(filename, 'w:gz') as tar:
|
|
|
+ for d in dirs:
|
|
|
+ tar.add(d)
|
|
|
+ return filename
|
|
|
+
|
|
|
+
|
|
|
+def validate_archive_dirs(dirs: list[str]) -> bool:
|
|
|
+ """Validate the list of directories"""
|
|
|
+ for d in dirs:
|
|
|
+ # We don't allow .. in paths in our archives.
|
|
|
+ if d == '..':
|
|
|
+ return False
|
|
|
+ # We only allow dirs.
|
|
|
+ if not os.path.isdir(d):
|
|
|
+ return False
|
|
|
+ # Symlinks must point to a target inside the dirs
|
|
|
+ if os.path.islink(d) and not any(
|
|
|
+ os.realpath(d).startswith(os.realpath(dir_prefix))
|
|
|
+ for dir_prefix in dirs):
|
|
|
+ return False
|
|
|
+ # We required that the subdirectories we are archiving are all just
|
|
|
+ # below cwd.
|
|
|
+ if d not in next(os.walk('.'))[1]:
|
|
|
+ return False
|
|
|
+
|
|
|
+ return True
|
|
|
+
|
|
|
+
|
|
|
+def get_sha256sum(filename: str) -> str:
|
|
|
+ """Get the sha256sum of the file"""
|
|
|
+ sha = hashlib.sha256()
|
|
|
+ with open(filename, 'rb') as f:
|
|
|
+ while True:
|
|
|
+ # Read in 1mb chunks, so it doesn't all have to be loaded into
|
|
|
+ # memory.
|
|
|
+ chunk = f.read(1024 * 1024)
|
|
|
+ if not chunk:
|
|
|
+ break
|
|
|
+ sha.update(chunk)
|
|
|
+ return sha.hexdigest()
|
|
|
+
|
|
|
+
|
|
|
+def upload_to_google_storage(file: str, base_url: str, object_name: str,
|
|
|
+ gsutil: Gsutil, force: bool, gzip: str,
|
|
|
+ dry_run: bool):
|
|
|
+ """Upload file to GCS"""
|
|
|
+ file_url = '%s/%s' % (base_url, object_name)
|
|
|
+ if gsutil.check_call('ls', file_url)[0] == 0 and not force:
|
|
|
+ # File exists, check MD5 hash.
|
|
|
+ _, out, _ = gsutil.check_call_with_retries('ls', '-L', file_url)
|
|
|
+ etag_match = re.search(r'ETag:\s+\S+', out)
|
|
|
+ if etag_match:
|
|
|
+ raise Exception('File with url %s already exists' % file_url)
|
|
|
+ if dry_run:
|
|
|
+ return
|
|
|
+ print("Uploading %s as %s" % (file, file_url))
|
|
|
+ gsutil_args = ['-h', 'Cache-Control:public, max-age=31536000', 'cp']
|
|
|
+ if gzip:
|
|
|
+ gsutil_args.extend(['-z', gzip])
|
|
|
+ gsutil_args.extend([file, file_url])
|
|
|
+ code, _, err = gsutil.check_call_with_retries(*gsutil_args)
|
|
|
+ if code != 0:
|
|
|
+ raise Exception(
|
|
|
+ code, 'Encountered error on uploading %s to %s\n%s' %
|
|
|
+ (file, file_url, err))
|
|
|
+
|
|
|
+
|
|
|
+def construct_deps_blob(bucket: str, object_name: str, file: str) -> dict:
|
|
|
+ """Output a blob hint that would need be added to a DEPS file"""
|
|
|
+ sha256sum = get_sha256sum(file)
|
|
|
+ size_bytes = os.path.getsize(file)
|
|
|
+ return {
|
|
|
+ '<path>': {
|
|
|
+ 'dep_type': 'gcs',
|
|
|
+ 'bucket': bucket,
|
|
|
+ 'object_name': object_name,
|
|
|
+ 'sha256sum': sha256sum,
|
|
|
+ 'size_bytes': size_bytes,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ parser = optparse.OptionParser(USAGE_STRING)
|
|
|
+ parser.add_option('-b',
|
|
|
+ '--bucket',
|
|
|
+ help='Google Storage bucket to upload to.')
|
|
|
+ parser.add_option('-o',
|
|
|
+ '--object-name',
|
|
|
+ help='Optional object name of uploaded tar file. '
|
|
|
+ 'If empty, the sha256sum will be the object name.')
|
|
|
+ parser.add_option('-d',
|
|
|
+ '--dry-run',
|
|
|
+ action='store_true',
|
|
|
+ help='Check if file already exists on GS without '
|
|
|
+ 'uploading it and output DEP blob.')
|
|
|
+ parser.add_option('-c',
|
|
|
+ '--config',
|
|
|
+ action='store_true',
|
|
|
+ help='Alias for "gsutil config". Run this if you want '
|
|
|
+ 'to initialize your saved Google Storage '
|
|
|
+ 'credentials. This will create a read-only '
|
|
|
+ 'credentials file in ~/.boto.depot_tools.')
|
|
|
+ parser.add_option('-e', '--boto', help='Specify a custom boto file.')
|
|
|
+ parser.add_option('-f',
|
|
|
+ '--force',
|
|
|
+ action='store_true',
|
|
|
+ help='Force upload even if remote file exists.')
|
|
|
+ parser.add_option('-g',
|
|
|
+ '--gsutil_path',
|
|
|
+ default=GSUTIL_DEFAULT_PATH,
|
|
|
+ help='Path to the gsutil script.')
|
|
|
+ parser.add_option('-0',
|
|
|
+ '--use_null_terminator',
|
|
|
+ action='store_true',
|
|
|
+ help='Use \\0 instead of \\n when parsing '
|
|
|
+ 'the file list from stdin. This is useful if the input '
|
|
|
+ 'is coming from "find ... -print0".')
|
|
|
+ parser.add_option('-z',
|
|
|
+ '--gzip',
|
|
|
+ metavar='ext',
|
|
|
+ help='For files which end in <ext> gzip them before '
|
|
|
+ 'upload. '
|
|
|
+ 'ext is a comma-separated list')
|
|
|
+ (options, args) = parser.parse_args()
|
|
|
+
|
|
|
+ # Enumerate our inputs.
|
|
|
+ input_filenames = get_targets(args, parser, options.use_null_terminator)
|
|
|
+
|
|
|
+ if len(input_filenames) > 1 or (len(input_filenames) == 1
|
|
|
+ and os.path.isdir(input_filenames[0])):
|
|
|
+ if not validate_archive_dirs(input_filenames):
|
|
|
+ parser.error(
|
|
|
+ 'Only directories just below cwd are valid entries. '
|
|
|
+ 'Entries cannot contain .. and entries can not be symlinks. '
|
|
|
+ 'Entries was %s' % input_filenames)
|
|
|
+ return 1
|
|
|
+ file = create_archive(input_filenames)
|
|
|
+ else:
|
|
|
+ file = input_filenames[0]
|
|
|
+
|
|
|
+ object_name = options.object_name
|
|
|
+ if not object_name:
|
|
|
+ object_name = get_sha256sum(file)
|
|
|
+
|
|
|
+ # Make sure we can find a working instance of gsutil.
|
|
|
+ if os.path.exists(GSUTIL_DEFAULT_PATH):
|
|
|
+ gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto)
|
|
|
+ else:
|
|
|
+ gsutil = None
|
|
|
+ for path in os.environ["PATH"].split(os.pathsep):
|
|
|
+ if os.path.exists(path) and 'gsutil' in os.listdir(path):
|
|
|
+ gsutil = Gsutil(os.path.join(path, 'gsutil'),
|
|
|
+ boto_path=options.boto)
|
|
|
+ if not gsutil:
|
|
|
+ parser.error('gsutil not found in %s, bad depot_tools checkout?' %
|
|
|
+ GSUTIL_DEFAULT_PATH)
|
|
|
+
|
|
|
+ # Passing in -g/--config will run our copy of GSUtil, then quit.
|
|
|
+ if options.config:
|
|
|
+ print('===Note from depot_tools===')
|
|
|
+ print('If you do not have a project ID, enter "0" when asked for one.')
|
|
|
+ print('===End note from depot_tools===')
|
|
|
+ print()
|
|
|
+ gsutil.check_call('version')
|
|
|
+ return gsutil.call('config')
|
|
|
+
|
|
|
+ base_url = 'gs://%s' % options.bucket
|
|
|
+
|
|
|
+ upload_to_google_storage(file, base_url, object_name, gsutil, options.force,
|
|
|
+ options.gzip, options.dry_run)
|
|
|
+ print(
|
|
|
+ json.dumps(construct_deps_blob(options.bucket, object_name, file),
|
|
|
+ indent=2))
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ try:
|
|
|
+ sys.exit(main())
|
|
|
+ except KeyboardInterrupt:
|
|
|
+ sys.stderr.write('interrupted\n')
|
|
|
+ sys.exit(1)
|