presubmit_diff.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2024 The Chromium Authors. All rights reserved.
  3. # Use of this source code is governed by a BSD-style license that can be
  4. # found in the LICENSE file.
  5. """Tool for generating a unified git diff outside of a git workspace.
  6. This is intended as a preprocessor for presubmit_support.py.
  7. """
  8. from __future__ import annotations
  9. import argparse
  10. import base64
  11. import concurrent.futures
  12. import os
  13. import platform
  14. import sys
  15. import gclient_utils
  16. from gerrit_util import (CreateHttpConn, ReadHttpResponse,
  17. MAX_CONCURRENT_CONNECTION)
  18. import subprocess2
  19. DEV_NULL = "/dev/null"
  20. HEADER_DELIMITER = "@@"
  21. def fetch_content(host: str, repo: str, ref: str, file: str) -> bytes:
  22. """Fetches the content of a file from Gitiles.
  23. If the file does not exist at the commit, returns an empty bytes object.
  24. Args:
  25. host: Gerrit host.
  26. repo: Gerrit repo.
  27. ref: Gerrit commit.
  28. file: Path of file to fetch.
  29. Returns:
  30. Bytes of the file at the commit or an empty bytes object if the file
  31. does not exist at the commit.
  32. """
  33. conn = CreateHttpConn(f"{host}.googlesource.com",
  34. f"{repo}/+show/{ref}/{file}?format=text")
  35. response = ReadHttpResponse(conn, accept_statuses=[200, 404])
  36. return base64.b64decode(response.read())
  37. def git_diff(src: str | None, dest: str | None, unified: int | None) -> str:
  38. """Returns the result of `git diff --no-index` between two paths.
  39. If a path is not specified, the diff is against /dev/null. At least one of
  40. src or dest must be specified.
  41. Args:
  42. src: Source path.
  43. dest: Destination path.
  44. unified: Number of lines of context. If None, git diff uses 3 as
  45. the default value.
  46. Returns:
  47. A string containing the git diff.
  48. """
  49. args = ["git", "diff", "--no-index"]
  50. if unified is not None:
  51. # git diff doesn't error out even if it's given a negative <n> value.
  52. # e.g., --unified=-3323, -U-3
  53. #
  54. # It just ignores the value and treats it as 0.
  55. # hence, this script doesn't bother validating the <n> value.
  56. args.append(f"-U{unified}")
  57. args.extend(["--", src or DEV_NULL, dest or DEV_NULL])
  58. return subprocess2.capture(args).decode("utf-8")
  59. def _process_diff(diff: str, src_root: str, dst_root: str) -> str:
  60. """Adjust paths in the diff header so they're relative to the root.
  61. This also modifies paths on Windows to use forward slashes.
  62. """
  63. if not diff:
  64. return ""
  65. has_chunk_header = HEADER_DELIMITER in diff
  66. if has_chunk_header:
  67. header, body = diff.split(HEADER_DELIMITER, maxsplit=1)
  68. else:
  69. # Only the file mode changed.
  70. header = diff
  71. norm_src = src_root.rstrip(os.sep)
  72. norm_dst = dst_root.rstrip(os.sep)
  73. if platform.system() == "Windows":
  74. # Absolute paths on Windows use the format:
  75. # "a/C:\\abspath\\to\\file.txt"
  76. header = header.replace("\\\\", "\\")
  77. header = header.replace('"', "")
  78. header = header.replace(norm_src + "\\", "")
  79. header = header.replace(norm_dst + "\\", "")
  80. else:
  81. # Other systems use:
  82. # a/abspath/to/file.txt
  83. header = header.replace(norm_src, "")
  84. header = header.replace(norm_dst, "")
  85. if has_chunk_header:
  86. return header + HEADER_DELIMITER + body
  87. return header
  88. def _create_diff(host: str,
  89. repo: str,
  90. ref: str,
  91. root: str,
  92. file: str,
  93. unified: int | None = None) -> str:
  94. new_file = os.path.join(root, file)
  95. if not os.path.exists(new_file):
  96. new_file = None
  97. with gclient_utils.temporary_directory() as tmp_root:
  98. old_file = None
  99. old_content = fetch_content(host, repo, ref, file)
  100. if old_content:
  101. old_file = os.path.join(tmp_root, file)
  102. os.makedirs(os.path.dirname(old_file), exist_ok=True)
  103. with open(old_file, "wb") as f:
  104. f.write(old_content)
  105. if not old_file and not new_file:
  106. raise RuntimeError(f"Could not access file {file} from {root} "
  107. f"or from {host}/{repo}:{ref}.")
  108. diff = git_diff(old_file, new_file, unified)
  109. return _process_diff(diff, tmp_root, root)
  110. def create_diffs(host: str, repo: str, ref: str, root: str, files: list[str],
  111. unified: int | None) -> dict[str, str]:
  112. """Calculates diffs of files in a directory against a commit.
  113. Args:
  114. host: Gerrit host.
  115. repo: Gerrit repo.
  116. ref: Gerrit commit.
  117. root: Path of local directory containing modified files.
  118. files: List of file paths relative to root.
  119. unified: Number of lines of context. If None, git diff uses 3 as
  120. the default value.
  121. Returns:
  122. A dict mapping file paths to diffs.
  123. Raises:
  124. RuntimeError: If a file is missing in both the root and the repo.
  125. """
  126. diffs = {}
  127. with concurrent.futures.ThreadPoolExecutor(
  128. max_workers=MAX_CONCURRENT_CONNECTION) as executor:
  129. futures_to_file = {
  130. executor.submit(_create_diff, host, repo, ref, root, file, unified):
  131. file
  132. for file in files
  133. }
  134. for future in concurrent.futures.as_completed(futures_to_file):
  135. file = futures_to_file[future]
  136. diffs[file] = future.result()
  137. return diffs
  138. def main(argv):
  139. parser = argparse.ArgumentParser(
  140. usage="%(prog)s [options] <files...>",
  141. description="Makes a unified git diff against a Gerrit commit.",
  142. )
  143. parser.add_argument("--output", help="File to write the diff to.")
  144. parser.add_argument("--host", required=True, help="Gerrit host.")
  145. parser.add_argument("--repo", required=True, help="Gerrit repo.")
  146. parser.add_argument("--ref",
  147. required=True,
  148. help="Gerrit ref to diff against.")
  149. parser.add_argument("--root",
  150. required=True,
  151. help="Folder containing modified files.")
  152. parser.add_argument("-U",
  153. "--unified",
  154. required=False,
  155. type=int,
  156. help="generate diffs with <n> lines context",
  157. metavar='<n>')
  158. parser.add_argument(
  159. "files",
  160. nargs="+",
  161. help="List of changed files. Paths are relative to the repo root.",
  162. )
  163. options = parser.parse_args(argv)
  164. diffs = create_diffs(options.host, options.repo, options.ref, options.root,
  165. options.files, options.unified)
  166. unified_diff = "\n".join([d for d in diffs.values() if d])
  167. if options.output:
  168. with open(options.output, "w") as f:
  169. f.write(unified_diff)
  170. else:
  171. print(unified_diff)
  172. return 0
  173. if __name__ == "__main__":
  174. sys.exit(main(sys.argv[1:]))