gerrit_client.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. #!/usr/bin/env vpython3
  2. # Copyright 2014 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Simple client for the Gerrit REST API.
  6. Example usage:
  7. ./gerrit_client.py -j /tmp/out.json -f json \
  8. -u https://chromium.googlesource.com/chromium/src/+log
  9. """
  10. from __future__ import print_function
  11. import argparse
  12. import json
  13. import logging
  14. import os
  15. import sys
  16. import tarfile
  17. import time
  18. try:
  19. from urllib import urlencode
  20. import urlparse
  21. except ImportError: # pragma: no cover
  22. from urllib.parse import urlencode
  23. import urllib.parse as urlparse
  24. DEPOT_TOOLS = os.path.abspath(
  25. os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir,
  26. os.pardir))
  27. sys.path.insert(0, DEPOT_TOOLS)
  28. from gerrit_util import CreateHttpConn, ReadHttpResponse, ReadHttpJsonResponse
  29. def reparse_url(parsed_url, query_params):
  30. return urlparse.ParseResult(
  31. scheme=parsed_url.scheme,
  32. netloc=parsed_url.netloc,
  33. path=parsed_url.path,
  34. params=parsed_url.params,
  35. fragment=parsed_url.fragment,
  36. query=urlencode(query_params, doseq=True))
  37. def gitiles_get(parsed_url, handler, attempts):
  38. # This insanity is due to CreateHttpConn interface :(
  39. host = parsed_url.netloc
  40. path = parsed_url.path
  41. if parsed_url.query:
  42. path += '?%s' % (parsed_url.query, )
  43. retry_delay_seconds = 1
  44. attempt = 1
  45. while True:
  46. try:
  47. return handler(CreateHttpConn(host, path))
  48. except Exception as e:
  49. if attempt >= attempts:
  50. raise
  51. logging.exception('Failed to perform Gitiles operation: %s', e)
  52. # Retry from previous loop.
  53. logging.error('Sleeping %d seconds before retry (%d/%d)...',
  54. retry_delay_seconds, attempt, attempts)
  55. time.sleep(retry_delay_seconds)
  56. retry_delay_seconds *= 2
  57. attempt += 1
  58. def fetch_log_with_paging(query_params, limit, fetch):
  59. """Fetches log, possibly requesting multiple pages to do so.
  60. Args:
  61. query_params (dict): Parameters to use in the request.
  62. limit (int): Page size.
  63. fetch (function): Function to use to make the requests.
  64. Returns:
  65. Dict with key "log", whose value is a list of commits.
  66. """
  67. # Log api returns {'log': [list of commits], 'next': hash}.
  68. last_result = fetch(query_params)
  69. commits = last_result['log']
  70. while last_result.get('next') and len(commits) < limit:
  71. query_params['s'] = last_result.get('next')
  72. last_result = fetch(query_params)
  73. # The first commit in `last_result` is not necessarily the parent of the
  74. # last commit in result so far! This is because log command can be done on
  75. # one file object, for example:
  76. # https://gerrit.googlesource.com/gitiles/+log/1c21279f337da8130/COPYING
  77. # Even when getting log for the whole repository, there could be merge
  78. # commits.
  79. commits.extend(last_result['log'])
  80. # Use 'next' field (if any) from `last_result`, but commits aggregated
  81. # from all the results. This essentially imitates paging with at least
  82. # `limit` page size.
  83. last_result['log'] = commits
  84. logging.debug(
  85. 'fetched %d commits, next: %s.', len(commits),
  86. last_result.get('next'))
  87. return last_result
  88. def main(arguments):
  89. parser = create_argparser()
  90. args = parser.parse_args(arguments)
  91. if args.extract_to and args.format != "archive":
  92. parser.error('--extract-to requires --format=archive')
  93. if not args.extract_to and args.format == "archive":
  94. parser.error('--format=archive requires --extract-to')
  95. if args.extract_to:
  96. # make sure it is absolute and ends with '/'
  97. args.extract_to = os.path.join(os.path.abspath(args.extract_to), '')
  98. os.makedirs(args.extract_to)
  99. parsed_url = urlparse.urlparse(args.url)
  100. if not parsed_url.scheme.startswith('http'):
  101. parser.error('Invalid URI scheme (expected http or https): %s' % args.url)
  102. query_params = {}
  103. if parsed_url.query:
  104. query_params.update(urlparse.parse_qs(parsed_url.query))
  105. # Force the format specified on command-line.
  106. if query_params.get('format'):
  107. parser.error('URL must not contain format; use --format command line flag '
  108. 'instead.')
  109. query_params['format'] = args.format
  110. kwargs = {}
  111. accept_statuses = frozenset([int(s) for s in args.accept_statuses.split(',')])
  112. if accept_statuses:
  113. kwargs['accept_statuses'] = accept_statuses
  114. # Choose handler.
  115. if args.format == 'json':
  116. def handler(conn):
  117. return ReadHttpJsonResponse(conn, **kwargs)
  118. elif args.format == 'text':
  119. # Text fetching will pack the text into structured JSON.
  120. def handler(conn):
  121. # Wrap in a structured JSON for export to recipe module.
  122. return {
  123. 'value': ReadHttpResponse(conn, **kwargs).read() or None,
  124. }
  125. elif args.format == 'archive':
  126. # Archive fetching hooks result to tarfile extraction. This implementation
  127. # is able to do a streaming extraction operation without having to buffer
  128. # the entire tarfile.
  129. def handler(conn):
  130. ret = {
  131. 'extracted': {
  132. 'filecount': 0,
  133. 'bytes': 0,
  134. },
  135. 'skipped': {
  136. 'filecount': 0,
  137. 'bytes': 0,
  138. 'names': [],
  139. }
  140. }
  141. fileobj = ReadHttpResponse(conn, **kwargs)
  142. with tarfile.open(mode='r|*', fileobj=fileobj) as tf:
  143. # monkeypatch the TarFile object to allow printing messages and
  144. # collecting stats for each extracted file. extractall makes a single
  145. # linear pass over the tarfile, which is compatible with
  146. # ReadHttpResponse; other naive implementations (such as `getmembers`)
  147. # do random access over the file and would require buffering the whole
  148. # thing (!!).
  149. em = tf._extract_member
  150. def _extract_member(tarinfo, targetpath):
  151. if not os.path.abspath(targetpath).startswith(args.extract_to):
  152. print('Skipping %s' % (tarinfo.name,))
  153. ret['skipped']['filecount'] += 1
  154. ret['skipped']['bytes'] += tarinfo.size
  155. ret['skipped']['names'].append(tarinfo.name)
  156. return
  157. print('Extracting %s' % (tarinfo.name,))
  158. ret['extracted']['filecount'] += 1
  159. ret['extracted']['bytes'] += tarinfo.size
  160. return em(tarinfo, targetpath)
  161. tf._extract_member = _extract_member
  162. tf.extractall(args.extract_to)
  163. return ret
  164. if args.log_start:
  165. query_params['s'] = args.log_start
  166. def fetch(query_params):
  167. parsed_url_with_query = reparse_url(parsed_url, query_params)
  168. result = gitiles_get(parsed_url_with_query, handler, args.attempts)
  169. if not args.quiet:
  170. logging.info('Read from %s: %s', parsed_url_with_query.geturl(), result)
  171. return result
  172. if args.log_limit:
  173. if args.format != 'json':
  174. parser.error('--log-limit works with json format only')
  175. result = fetch_log_with_paging(query_params, args.log_limit, fetch)
  176. else:
  177. # Either not a log request, or don't care about paging.
  178. # So, just return whatever is fetched the first time.
  179. result = fetch(query_params)
  180. with open(args.json_file, 'w') as json_file:
  181. json.dump(result, json_file)
  182. return 0
  183. def create_argparser():
  184. parser = argparse.ArgumentParser()
  185. parser.add_argument(
  186. '-j', '--json-file',
  187. help='Path to json file for output.')
  188. parser.add_argument(
  189. '--extract-to',
  190. help='Local path to extract archive url. Must not exist.')
  191. parser.add_argument(
  192. '-f', '--format', required=True, choices=('json', 'text', 'archive'))
  193. parser.add_argument(
  194. '-u', '--url', required=True,
  195. help='Url of gitiles. For example, '
  196. 'https://chromium.googlesource.com/chromium/src/+refs. '
  197. 'Insert a/ after domain for authenticated access.')
  198. parser.add_argument(
  199. '-a', '--attempts', type=int, default=1,
  200. help='The number of attempts to make (with exponential backoff) before '
  201. 'failing. If several requests are to be made, applies per each '
  202. 'request separately.')
  203. parser.add_argument(
  204. '-q', '--quiet', action='store_true',
  205. help='Suppress file contents logging output.')
  206. parser.add_argument(
  207. '--log-limit', type=int, default=None,
  208. help='Follow gitiles pages to fetch at least this many commits. By '
  209. 'default, first page with unspecified number of commits is fetched. '
  210. 'Only for https://<hostname>/<repo>/+log/... gitiles request.')
  211. parser.add_argument(
  212. '--log-start',
  213. help='If given, continue fetching log by paging from this commit hash. '
  214. 'This value can be typically be taken from json result of previous '
  215. 'call to log, which returns next page start commit as "next" key. '
  216. 'Only for https://<hostname>/<repo>/+log/... gitiles request.')
  217. parser.add_argument(
  218. '--accept-statuses', type=str, default='200',
  219. help='Comma-separated list of Status codes to accept as "successful" '
  220. 'HTTP responses.')
  221. return parser
  222. if __name__ == '__main__':
  223. logging.basicConfig()
  224. logging.getLogger().setLevel(logging.INFO)
  225. sys.exit(main(sys.argv[1:]))