123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263 |
- # Copyright 2018 The Chromium Authors. All rights reserved.
- # Use of this source code is governed by a BSD-style license that can be
- # found in the LICENSE file.
- from __future__ import division
- import base64
- try:
- import urlparse
- except ImportError: # pragma: no cover
- import urllib.parse as urlparse
- from recipe_engine import recipe_api
- class Gitiles(recipe_api.RecipeApi):
- """Module for polling a git repository using the Gitiles web interface."""
- def _fetch(self, url, step_name, fmt, attempts=None, add_json_log=True,
- log_limit=None, log_start=None, extract_to=None, **kwargs):
- """Fetches information from Gitiles.
- Args:
- * fmt (str): one of ('text', 'json', 'archive'). Instructs the underlying
- gitiles_client tool how to process the HTTP response.
- * text - implies the response is base64 encoded
- * json - implies the response is JSON
- * archive - implies the response is a compressed tarball; requires
- `extract_to`.
- * extract_to (Path): When fmt=='archive', instructs gitiles_client to
- extract the archive to this non-existent folder.
- * log_limit: for log URLs, limit number of results. None implies 1 page,
- as returned by Gitiles.
- * log_start: for log URLs, the start cursor for paging.
- * add_json_log: if True, will spill out json into log.
- """
- assert fmt in ('json', 'text', 'archive')
- args = [
- '--json-file', self.m.json.output(add_json_log=add_json_log),
- '--url', url,
- '--format', fmt,
- ]
- if fmt == 'archive':
- assert extract_to is not None, 'archive format requires extract_to'
- args.extend(['--extract-to', extract_to])
- if attempts:
- args.extend(['--attempts', attempts])
- if log_limit is not None:
- args.extend(['--log-limit', log_limit])
- if log_start is not None:
- args.extend(['--log-start', log_start])
- accept_statuses = kwargs.pop('accept_statuses', None)
- if accept_statuses:
- args.extend([
- '--accept-statuses',
- ','.join([str(s) for s in accept_statuses])])
- return self.m.python(
- step_name, self.resource('gerrit_client.py'), args, **kwargs)
- def refs(self, url, step_name='refs', attempts=None):
- """Returns a list of refs in the remote repository."""
- step_result = self._fetch(
- self.m.url.join(url, '+refs'),
- step_name,
- fmt='json',
- attempts=attempts)
- refs = sorted(str(ref) for ref in step_result.json.output)
- step_result.presentation.logs['refs'] = refs
- return refs
- def log(self, url, ref, limit=0, cursor=None,
- step_name=None, attempts=None, **kwargs):
- """Returns the most recent commits under the given ref with properties.
- Args:
- * url (str): URL of the remote repository.
- * ref (str): Name of the desired ref (see Gitiles.refs).
- * limit (int): Number of commits to limit the fetching to.
- Gitiles does not return all commits in one call; instead paging is
- used. 0 implies to return whatever first gerrit responds with.
- Otherwise, paging will be used to fetch at least this many
- commits, but all fetched commits will be returned.
- * cursor (str or None): The paging cursor used to fetch the next page.
- * step_name (str): Custom name for this step (optional).
- Returns:
- A tuple of (commits, cursor).
- Commits are a list of commits (as Gitiles dict structure) in reverse
- chronological order. The number of commits may be higher than limit
- argument.
- Cursor can be used for subsequent calls to log for paging. If None,
- signals that there are no more commits to fetch.
- """
- assert limit >= 0
- step_name = step_name or 'gitiles log: %s%s' % (
- ref, ' from %s' % cursor if cursor else '')
- step_result = self._fetch(
- self.m.url.join(url, '+log/%s' % ref),
- step_name,
- log_limit=limit,
- log_start=cursor,
- attempts=attempts,
- fmt='json',
- add_json_log=True,
- **kwargs)
- # The output is formatted as a JSON dict with a "log" key. The "log" key
- # is a list of commit dicts, which contain information about the commit.
- commits = step_result.json.output['log']
- cursor = step_result.json.output.get('next')
- step_result.presentation.step_text = (
- '<br />%d commits fetched' % len(commits))
- return commits, cursor
- def commit_log(self, url, commit, step_name=None, attempts=None):
- """Returns: (dict) the Gitiles commit log structure for a given commit.
- Args:
- * url (str): The base repository URL.
- * commit (str): The commit hash.
- * step_name (str): If not None, override the step name.
- * attempts (int): Number of times to try the request before failing.
- """
- step_name = step_name or 'commit log: %s' % commit
- commit_url = '%s/+/%s' % (url, commit)
- step_result = self._fetch(commit_url, step_name, attempts=attempts,
- fmt='json')
- return step_result.json.output
- def download_file(self, repository_url, file_path, branch='master',
- step_name=None, attempts=None, **kwargs):
- """Downloads raw file content from a Gitiles repository.
- Args:
- * repository_url (str): Full URL to the repository.
- * branch (str): Branch of the repository.
- * file_path (str): Relative path to the file from the repository root.
- * step_name (str): Custom name for this step (optional).
- * attempts (int): Number of times to try the request before failing.
- Returns:
- Raw file content.
- """
- fetch_url = self.m.url.join(repository_url, '+/%s/%s' % (branch, file_path))
- step_result = self._fetch(
- fetch_url,
- step_name or 'fetch %s:%s' % (branch, file_path,),
- attempts=attempts,
- fmt='text',
- add_json_log=False,
- **kwargs)
- if step_result.json.output['value'] is None:
- return None
- return base64.b64decode(step_result.json.output['value'])
- def download_archive(self, repository_url, destination,
- revision='refs/heads/master'):
- """Downloads an archive of the repo and extracts it to `destination`.
- If the gitiles server attempts to provide a tarball with paths which escape
- `destination`, this function will extract all valid files and then
- raise StepFailure with an attribute `StepFailure.gitiles_skipped_files`
- containing the names of the files that were skipped.
- Args:
- * repository_url (str): Full URL to the repository
- * destination (Path): Local path to extract the archive to. Must not exist
- prior to this call.
- * revision (str): The ref or revision in the repo to download. Defaults to
- 'refs/heads/master'.
- """
- step_name = 'download %s @ %s' % (repository_url, revision)
- fetch_url = self.m.url.join(repository_url, '+archive/%s.tgz' % (revision,))
- step_result = self._fetch(
- fetch_url,
- step_name,
- fmt='archive',
- add_json_log=False,
- extract_to=destination,
- step_test_data=lambda: self.m.json.test_api.output({
- 'extracted': {
- 'filecount': 1337,
- 'bytes': 7192345,
- },
- })
- )
- self.m.path.mock_add_paths(destination)
- j = step_result.json.output
- if j['extracted']['filecount']:
- stat = j['extracted']
- step_result.presentation.step_text += (
- '<br/>extracted %s files - %.02f MB' % (
- stat['filecount'], stat['bytes'] / (1000.0**2)))
- if j.get('skipped', {}).get('filecount'):
- stat = j['skipped']
- step_result.presentation.step_text += (
- '<br/>SKIPPED %s files - %.02f MB' % (
- stat['filecount'], stat['bytes'] / (1000.0**2)))
- step_result.presentation.logs['skipped files'] = stat['names']
- step_result.presentation.status = self.m.step.FAILURE
- ex = self.m.step.StepFailure(step_name)
- ex.gitiles_skipped_files = stat['names']
- raise ex
- def parse_repo_url(self, repo_url):
- """Returns (host, project) pair.
- Returns (None, None) if repo_url is not recognized.
- """
- return parse_repo_url(repo_url)
- def unparse_repo_url(self, host, project):
- """Generates a Gitiles repo URL. See also parse_repo_url."""
- return unparse_repo_url(host, project)
- def canonicalize_repo_url(self, repo_url):
- """Returns a canonical form of repo_url. If not recognized, returns as is.
- """
- if repo_url:
- host, project = parse_repo_url(repo_url)
- if host and project:
- repo_url = unparse_repo_url(host, project)
- return repo_url
- def parse_http_host_and_path(url):
- # Copied from https://chromium.googlesource.com/infra/luci/recipes-py/+/809e57935211b3fcb802f74a7844d4f36eff6b87/recipe_modules/buildbucket/util.py
- parsed = urlparse.urlparse(url)
- if not parsed.scheme:
- parsed = urlparse.urlparse('https://' + url)
- if (parsed.scheme in ('http', 'https') and
- not parsed.params and
- not parsed.query and
- not parsed.fragment):
- return parsed.netloc, parsed.path
- return None, None
- def parse_repo_url(repo_url):
- """Returns (host, project) pair.
- Returns (None, None) if repo_url is not recognized.
- """
- # Adapted from https://chromium.googlesource.com/infra/luci/recipes-py/+/809e57935211b3fcb802f74a7844d4f36eff6b87/recipe_modules/buildbucket/util.py
- host, project = parse_http_host_and_path(repo_url)
- if not host or not project or '+' in project.split('/'):
- return None, None
- project = project.strip('/')
- if project.startswith('a/'):
- project = project[len('a/'):]
- if project.endswith('.git'):
- project = project[:-len('.git')]
- return host, project
- def unparse_repo_url(host, project):
- return 'https://%s/%s' % (host, project)
|