123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
- #!/usr/bin/env python3
- # Copyright (c) 2024 The Chromium Authors. All rights reserved.
- # Use of this source code is governed by a BSD-style license that can be
- # found in the LICENSE file.
- """Uploads files to Google Storage and output DEPS blob."""
- import hashlib
- import optparse
- import os
- import json
- import tempfile
- import re
- import sys
- import tarfile
- from download_from_google_storage import Gsutil
- from download_from_google_storage import GSUTIL_DEFAULT_PATH
- from typing import List
- MISSING_GENERATION_MSG = (
- 'missing generation number, please retrieve from Cloud Storage'
- 'before saving to DEPS')
- USAGE_STRING = """%prog [options] target [target2 ...].
- Target(s) is the files or directies intended to be uploaded to Google Storage.
- If a single target is a directory, it will be compressed and uploaded as a
- tar.gz file.
- If target is "-", then a list of directories will be taken from standard input.
- The list of directories will be compressed together and uploaded as one tar.gz
- file.
- Example usage
- ------------
- ./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
- --object-name my_object_name hello_world.txt
- ./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
- --object-name my_object_name my_dir1
- ./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
- --object-name my_object_name my_dir1 my_dir2
- Scan the current directory and upload all files larger than 1MB:
- find . -name .svn -prune -o -size +1000k -type f -print0 |
- ./upload_to_google_storage_first_class.py --bucket gsutil-upload-playground
- --object-name my_object_name -
- """
- def get_targets(args: List[str], parser: optparse.OptionParser,
- use_null_terminator: bool) -> List[str]:
- """Get target(s) to upload to GCS"""
- if not args:
- parser.error('Missing target.')
- if len(args) == 1 and args[0] == '-':
- # Take stdin as a newline or null separated list of files.
- if use_null_terminator:
- return sys.stdin.read().split('\0')
- return sys.stdin.read().splitlines()
- return args
- def create_archive(dirs: List[str]) -> str:
- """Given a list of directories, compress them all into one tar file"""
- # tarfile name cannot have a forward slash or else an error will be
- # thrown
- _, filename = tempfile.mkstemp(suffix='.tar.gz')
- with tarfile.open(filename, 'w:gz') as tar:
- for d in dirs:
- tar.add(d)
- return filename
- def validate_archive_dirs(dirs: List[str]) -> bool:
- """Validate the list of directories"""
- for d in dirs:
- # We don't allow .. in paths in our archives.
- if d == '..':
- return False
- # We only allow dirs.
- if not os.path.isdir(d):
- return False
- # Symlinks must point to a target inside the dirs
- if os.path.islink(d) and not any(
- os.realpath(d).startswith(os.realpath(dir_prefix))
- for dir_prefix in dirs):
- return False
- # We required that the subdirectories we are archiving are all just
- # below cwd.
- if d not in next(os.walk('.'))[1]:
- return False
- return True
- def get_sha256sum(filename: str) -> str:
- """Get the sha256sum of the file"""
- sha = hashlib.sha256()
- with open(filename, 'rb') as f:
- while True:
- # Read in 1mb chunks, so it doesn't all have to be loaded into
- # memory.
- chunk = f.read(1024 * 1024)
- if not chunk:
- break
- sha.update(chunk)
- return sha.hexdigest()
- def upload_to_google_storage(file: str, base_url: str, object_name: str,
- gsutil: Gsutil, force: bool, gzip: str,
- dry_run: bool) -> str:
- """Upload file to GCS"""
- file_url = '%s/%s' % (base_url, object_name)
- if gsutil.check_call('ls', file_url)[0] == 0 and not force:
- # File exists, check MD5 hash.
- _, out, _ = gsutil.check_call_with_retries('ls', '-L', file_url)
- etag_match = re.search(r'ETag:\s+\S+', out)
- if etag_match:
- raise Exception('File with url %s already exists' % file_url)
- if dry_run:
- return
- print("Uploading %s as %s" % (file, file_url))
- gsutil_args = ['-h', 'Cache-Control:public, max-age=31536000', 'cp', '-v']
- if gzip:
- gsutil_args.extend(['-z', gzip])
- gsutil_args.extend([file, file_url])
- code, _, err = gsutil.check_call_with_retries(*gsutil_args)
- if code != 0:
- raise Exception(
- code, 'Encountered error on uploading %s to %s\n%s' %
- (file, file_url, err))
- pattern = re.escape(file_url) + '#(?P<generation>\d+)'
- # The geneartion number is printed as part of the progress / status info
- # which gsutil outputs to stderr to keep separated from any final output
- # data.
- for line in err.strip().splitlines():
- m = re.search(pattern, line)
- if m:
- return m.group('generation')
- print('Warning: generation number could not be parsed from status'
- f'info: {err}')
- return MISSING_GENERATION_MSG
- def construct_deps_blob(bucket: str, object_name: str, file: str,
- generation: str) -> dict:
- """Output a blob hint that would need be added to a DEPS file"""
- return {
- 'path': {
- 'dep_type':
- 'gcs',
- 'bucket':
- bucket,
- 'objects': [{
- 'object_name': object_name,
- 'sha256sum': get_sha256sum(file),
- 'size_bytes': os.path.getsize(file),
- 'generation': int(generation),
- }],
- }
- }
- def main():
- parser = optparse.OptionParser(USAGE_STRING)
- parser.add_option('-b',
- '--bucket',
- help='Google Storage bucket to upload to.')
- parser.add_option('-p',
- '--prefix',
- help='Prefix that goes before object-name (i.e. in '
- 'between bucket and object name).')
- parser.add_option('-o',
- '--object-name',
- help='Optional object name of uploaded tar file. '
- 'If empty, the sha256sum will be the object name.')
- parser.add_option('-d',
- '--dry-run',
- action='store_true',
- help='Check if file already exists on GS without '
- 'uploading it and output DEP blob.')
- parser.add_option('-c',
- '--config',
- action='store_true',
- help='Alias for "gsutil config". Run this if you want '
- 'to initialize your saved Google Storage '
- 'credentials. This will create a read-only '
- 'credentials file in ~/.boto.depot_tools.')
- parser.add_option('-e', '--boto', help='Specify a custom boto file.')
- parser.add_option('-f',
- '--force',
- action='store_true',
- help='Force upload even if remote file exists.')
- parser.add_option('-g',
- '--gsutil_path',
- default=GSUTIL_DEFAULT_PATH,
- help='Path to the gsutil script.')
- parser.add_option('-0',
- '--use_null_terminator',
- action='store_true',
- help='Use \\0 instead of \\n when parsing '
- 'the file list from stdin. This is useful if the input '
- 'is coming from "find ... -print0".')
- parser.add_option('-z',
- '--gzip',
- metavar='ext',
- help='For files which end in <ext> gzip them before '
- 'upload. '
- 'ext is a comma-separated list')
- (options, args) = parser.parse_args()
- # Enumerate our inputs.
- input_filenames = get_targets(args, parser, options.use_null_terminator)
- # Allow uploading the entire directory
- if len(input_filenames) == 1 and input_filenames[0] in ('.', './'):
- input_filenames = next(os.walk('.'))[1]
- if len(input_filenames) > 1 or (len(input_filenames) == 1
- and os.path.isdir(input_filenames[0])):
- if not validate_archive_dirs(input_filenames):
- parser.error(
- 'Only directories just below cwd are valid entries. '
- 'Entries cannot contain .. and entries can not be symlinks. '
- 'Entries was %s' % input_filenames)
- return 1
- file = create_archive(input_filenames)
- else:
- file = input_filenames[0]
- object_name = options.object_name
- if not object_name:
- object_name = get_sha256sum(file)
- if options.prefix:
- object_name = f'{options.prefix}/{object_name}'
- # Make sure we can find a working instance of gsutil.
- if os.path.exists(GSUTIL_DEFAULT_PATH):
- gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto)
- else:
- gsutil = None
- for path in os.environ["PATH"].split(os.pathsep):
- if os.path.exists(path) and 'gsutil' in os.listdir(path):
- gsutil = Gsutil(os.path.join(path, 'gsutil'),
- boto_path=options.boto)
- if not gsutil:
- parser.error('gsutil not found in %s, bad depot_tools checkout?' %
- GSUTIL_DEFAULT_PATH)
- # Passing in -g/--config will run our copy of GSUtil, then quit.
- if options.config:
- print('===Note from depot_tools===')
- print('If you do not have a project ID, enter "0" when asked for one.')
- print('===End note from depot_tools===')
- print()
- gsutil.check_call('version')
- return gsutil.call('config')
- assert '/' not in options.bucket, "Slashes not allowed in bucket name"
- base_url = f'gs://{options.bucket}'
- generation = upload_to_google_storage(file, base_url, object_name, gsutil,
- options.force, options.gzip,
- options.dry_run)
- print(
- json.dumps(construct_deps_blob(options.bucket, object_name, file,
- generation),
- indent=2))
- if __name__ == '__main__':
- try:
- sys.exit(main())
- except KeyboardInterrupt:
- sys.stderr.write('interrupted\n')
- sys.exit(1)
|