api.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. # Copyright 2018 The Chromium Authors. All rights reserved.
  2. # Use of this source code is governed by a BSD-style license that can be
  3. # found in the LICENSE file.
  4. import base64
  5. import urlparse
  6. from recipe_engine import recipe_api
  7. class Gitiles(recipe_api.RecipeApi):
  8. """Module for polling a git repository using the Gitiles web interface."""
  9. def _fetch(self, url, step_name, fmt, attempts=None, add_json_log=True,
  10. log_limit=None, log_start=None, extract_to=None, **kwargs):
  11. """Fetches information from Gitiles.
  12. Arguments:
  13. fmt (str): one of ('text', 'json', 'archive'). Instructs the underlying
  14. gitiles_client tool how to process the HTTP response.
  15. * text - implies the response is base64 encoded
  16. * json - implies the response is JSON
  17. * archive - implies the response is a compressed tarball; requires
  18. `extract_to`.
  19. extract_to (Path): When fmt=='archive', instructs gitiles_client to
  20. extract the archive to this non-existent folder.
  21. log_limit: for log URLs, limit number of results. None implies 1 page,
  22. as returned by Gitiles.
  23. log_start: for log URLs, the start cursor for paging.
  24. add_json_log: if True, will spill out json into log.
  25. """
  26. assert fmt in ('json', 'text', 'archive')
  27. args = [
  28. '--json-file', self.m.json.output(add_json_log=add_json_log),
  29. '--url', url,
  30. '--format', fmt,
  31. ]
  32. if fmt == 'archive':
  33. assert extract_to is not None, 'archive format requires extract_to'
  34. args.extend(['--extract-to', extract_to])
  35. if attempts:
  36. args.extend(['--attempts', attempts])
  37. if log_limit is not None:
  38. args.extend(['--log-limit', log_limit])
  39. if log_start is not None:
  40. args.extend(['--log-start', log_start])
  41. accept_statuses = kwargs.pop('accept_statuses', None)
  42. if accept_statuses:
  43. args.extend([
  44. '--accept-statuses',
  45. ','.join([str(s) for s in accept_statuses])])
  46. return self.m.python(
  47. step_name, self.resource('gerrit_client.py'), args, **kwargs)
  48. def refs(self, url, step_name='refs', attempts=None):
  49. """Returns a list of refs in the remote repository."""
  50. step_result = self._fetch(
  51. self.m.url.join(url, '+refs'),
  52. step_name,
  53. fmt='json',
  54. attempts=attempts)
  55. refs = sorted(str(ref) for ref in step_result.json.output)
  56. step_result.presentation.logs['refs'] = refs
  57. return refs
  58. def log(self, url, ref, limit=0, cursor=None,
  59. step_name=None, attempts=None, **kwargs):
  60. """Returns the most recent commits under the given ref with properties.
  61. Args:
  62. url (str): URL of the remote repository.
  63. ref (str): Name of the desired ref (see Gitiles.refs).
  64. limit (int): Number of commits to limit the fetching to.
  65. Gitiles does not return all commits in one call; instead paging is
  66. used. 0 implies to return whatever first gerrit responds with.
  67. Otherwise, paging will be used to fetch at least this many
  68. commits, but all fetched commits will be returned.
  69. cursor (str or None): The paging cursor used to fetch the next page.
  70. step_name (str): Custom name for this step (optional).
  71. Returns:
  72. A tuple of (commits, cursor).
  73. Commits are a list of commits (as Gitiles dict structure) in reverse
  74. chronological order. The number of commits may be higher than limit
  75. argument.
  76. Cursor can be used for subsequent calls to log for paging. If None,
  77. signals that there are no more commits to fetch.
  78. """
  79. assert limit >= 0
  80. step_name = step_name or 'gitiles log: %s%s' % (
  81. ref, ' from %s' % cursor if cursor else '')
  82. step_result = self._fetch(
  83. self.m.url.join(url, '+log/%s' % ref),
  84. step_name,
  85. log_limit=limit,
  86. log_start=cursor,
  87. attempts=attempts,
  88. fmt='json',
  89. add_json_log=True,
  90. **kwargs)
  91. # The output is formatted as a JSON dict with a "log" key. The "log" key
  92. # is a list of commit dicts, which contain information about the commit.
  93. commits = step_result.json.output['log']
  94. cursor = step_result.json.output.get('next')
  95. step_result.presentation.step_text = (
  96. '<br />%d commits fetched' % len(commits))
  97. return commits, cursor
  98. def commit_log(self, url, commit, step_name=None, attempts=None):
  99. """Returns: (dict) the Gitiles commit log structure for a given commit.
  100. Args:
  101. url (str): The base repository URL.
  102. commit (str): The commit hash.
  103. step_name (str): If not None, override the step name.
  104. attempts (int): Number of times to try the request before failing.
  105. """
  106. step_name = step_name or 'commit log: %s' % commit
  107. commit_url = '%s/+/%s' % (url, commit)
  108. step_result = self._fetch(commit_url, step_name, attempts=attempts,
  109. fmt='json')
  110. return step_result.json.output
  111. def download_file(self, repository_url, file_path, branch='master',
  112. step_name=None, attempts=None, **kwargs):
  113. """Downloads raw file content from a Gitiles repository.
  114. Args:
  115. repository_url (str): Full URL to the repository.
  116. branch (str): Branch of the repository.
  117. file_path (str): Relative path to the file from the repository root.
  118. step_name (str): Custom name for this step (optional).
  119. attempts (int): Number of times to try the request before failing.
  120. Returns:
  121. Raw file content.
  122. """
  123. fetch_url = self.m.url.join(repository_url, '+/%s/%s' % (branch, file_path))
  124. step_result = self._fetch(
  125. fetch_url,
  126. step_name or 'fetch %s:%s' % (branch, file_path,),
  127. attempts=attempts,
  128. fmt='text',
  129. add_json_log=False,
  130. **kwargs)
  131. if step_result.json.output['value'] is None:
  132. return None
  133. return base64.b64decode(step_result.json.output['value'])
  134. def download_archive(self, repository_url, destination,
  135. revision='refs/heads/master'):
  136. """Downloads an archive of the repo and extracts it to `destination`.
  137. If the gitiles server attempts to provide a tarball with paths which escape
  138. `destination`, this function will extract all valid files and then
  139. raise StepFailure with an attribute `StepFailure.gitiles_skipped_files`
  140. containing the names of the files that were skipped.
  141. Args:
  142. repository_url (str): Full URL to the repository
  143. destination (Path): Local path to extract the archive to. Must not exist
  144. prior to this call.
  145. revision (str): The ref or revision in the repo to download. Defaults to
  146. 'refs/heads/master'.
  147. """
  148. step_name = 'download %s @ %s' % (repository_url, revision)
  149. fetch_url = self.m.url.join(repository_url, '+archive/%s.tgz' % (revision,))
  150. step_result = self._fetch(
  151. fetch_url,
  152. step_name,
  153. fmt='archive',
  154. add_json_log=False,
  155. extract_to=destination,
  156. step_test_data=lambda: self.m.json.test_api.output({
  157. 'extracted': {
  158. 'filecount': 1337,
  159. 'bytes': 7192345,
  160. },
  161. })
  162. )
  163. self.m.path.mock_add_paths(destination)
  164. j = step_result.json.output
  165. if j['extracted']['filecount']:
  166. stat = j['extracted']
  167. step_result.presentation.step_text += (
  168. '<br/>extracted %s files - %.02f MB' % (
  169. stat['filecount'], stat['bytes'] / (1000.0**2)))
  170. if j.get('skipped', {}).get('filecount'):
  171. stat = j['skipped']
  172. step_result.presentation.step_text += (
  173. '<br/>SKIPPED %s files - %.02f MB' % (
  174. stat['filecount'], stat['bytes'] / (1000.0**2)))
  175. step_result.presentation.logs['skipped files'] = stat['names']
  176. step_result.presentation.status = self.m.step.FAILURE
  177. ex = self.m.step.StepFailure(step_name)
  178. ex.gitiles_skipped_files = stat['names']
  179. raise ex
  180. def parse_repo_url(self, repo_url):
  181. """Returns (host, project) pair.
  182. Returns (None, None) if repo_url is not recognized.
  183. """
  184. return parse_repo_url(repo_url)
  185. def unparse_repo_url(self, host, project):
  186. """Generates a Gitiles repo URL. See also parse_repo_url."""
  187. return unparse_repo_url(host, project)
  188. def canonicalize_repo_url(self, repo_url):
  189. """Returns a canonical form of repo_url. If not recognized, returns as is.
  190. """
  191. if repo_url:
  192. host, project = parse_repo_url(repo_url)
  193. if host and project:
  194. repo_url = unparse_repo_url(host, project)
  195. return repo_url
  196. def parse_http_host_and_path(url):
  197. # Copied from https://chromium.googlesource.com/infra/luci/recipes-py/+/809e57935211b3fcb802f74a7844d4f36eff6b87/recipe_modules/buildbucket/util.py
  198. parsed = urlparse.urlparse(url)
  199. if not parsed.scheme:
  200. parsed = urlparse.urlparse('https://' + url)
  201. if (parsed.scheme in ('http', 'https') and
  202. not parsed.params and
  203. not parsed.query and
  204. not parsed.fragment):
  205. return parsed.netloc, parsed.path
  206. return None, None
  207. def parse_repo_url(repo_url):
  208. """Returns (host, project) pair.
  209. Returns (None, None) if repo_url is not recognized.
  210. """
  211. # Adapted from https://chromium.googlesource.com/infra/luci/recipes-py/+/809e57935211b3fcb802f74a7844d4f36eff6b87/recipe_modules/buildbucket/util.py
  212. host, project = parse_http_host_and_path(repo_url)
  213. if not host or not project or '+' in project.split('/'):
  214. return None, None
  215. project = project.strip('/')
  216. if project.startswith('a/'):
  217. project = project[len('a/'):]
  218. if project.endswith('.git'):
  219. project = project[:-len('.git')]
  220. return host, project
  221. def unparse_repo_url(host, project):
  222. return 'https://%s/%s' % (host, project)